aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.h42
-rw-r--r--lib/Target/AArch64/AArch64.td70
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp347
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.h80
-rw-r--r--lib/Target/AArch64/AArch64BranchFixupPass.cpp600
-rw-r--r--lib/Target/AArch64/AArch64CallingConv.td196
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp686
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h108
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp415
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp2976
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h247
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td961
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp822
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h112
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td5109
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp140
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.cpp18
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h149
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp171
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h76
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td203
-rw-r--r--lib/Target/AArch64/AArch64Schedule.td10
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp25
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h32
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp43
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h54
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp81
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h69
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp24
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h31
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2188
-rw-r--r--lib/Target/AArch64/AsmParser/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/AsmParser/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/AsmParser/Makefile15
-rw-r--r--lib/Target/AArch64/CMakeLists.txt36
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp787
-rw-r--r--lib/Target/AArch64/Disassembler/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/Disassembler/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/Disassembler/Makefile16
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp408
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h172
-rw-r--r--lib/Target/AArch64/InstPrinter/CMakeLists.txt8
-rw-r--r--lib/Target/AArch64/InstPrinter/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/InstPrinter/Makefile15
-rw-r--r--lib/Target/AArch64/LLVMBuild.txt36
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp585
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp292
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp160
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h27
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h113
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp41
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h27
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp513
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp178
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h167
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp194
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h65
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/AArch64/Makefile30
-rw-r--r--lib/Target/AArch64/README.txt2
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp24
-rw-r--r--lib/Target/AArch64/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/TargetInfo/LLVMBuild.txt24
-rw-r--r--lib/Target/AArch64/TargetInfo/Makefile15
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp819
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h784
-rw-r--r--lib/Target/AArch64/Utils/CMakeLists.txt5
-rw-r--r--lib/Target/AArch64/Utils/LLVMBuild.txt23
-rw-r--r--lib/Target/AArch64/Utils/Makefile15
-rw-r--r--lib/Target/ARM/ARM.td5
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp43
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h8
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp85
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h7
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp93
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp55
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp141
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp218
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td42
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td9
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp1
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td3
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp110
-rw-r--r--lib/Target/ARM/ARMSubtarget.h10
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h4
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp250
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp161
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp34
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp229
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h112
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp44
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h4
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp68
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h8
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp70
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/Hexagon.h8
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp183
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h5
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp1665
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp124
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp34
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td412
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td58
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp443
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td298
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td2609
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp117
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h12
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td24
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td14
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp387
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp44
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h17
-rw-r--r--lib/Target/Hexagon/InstPrinter/LLVMBuild.txt2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h88
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp175
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h100
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--lib/Target/LLVMBuild.txt2
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp6
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp39
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.h4
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp33
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td4
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp58
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp66
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp18
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp79
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h7
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp583
-rw-r--r--lib/Target/Mips/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/Disassembler/Makefile2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp14
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp10
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h3
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp22
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp89
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h43
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp13
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp80
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsReginfo.h31
-rw-r--r--lib/Target/Mips/Mips.td3
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp19
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h4
-rw-r--r--lib/Target/Mips/Mips16InstrFormats.td111
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp150
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h26
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td397
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp34
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h4
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td167
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp61
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h1
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp136
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp316
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp36
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp806
-rw-r--r--lib/Target/Mips/MipsISelLowering.h85
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td78
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td9
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td313
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp6
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp16
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h16
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp35
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h5
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td45
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp80
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h5
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp29
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp28
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h4
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp5
-rw-r--r--lib/Target/Mips/MipsSubtarget.h17
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp14
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h2
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTX.h1
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp49
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp11
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp809
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h7
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp472
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h21
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td96
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td145
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp183
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h10
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td44
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp1
-rw-r--r--lib/Target/NVPTX/VectorElementize.cpp1239
-rw-r--r--lib/Target/NVPTX/gen-register-defs.py202
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp19
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp10
-rw-r--r--lib/Target/PowerPC/PPC.h26
-rw-r--r--lib/Target/PowerPC/PPC.td44
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp48
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp11
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp14
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td62
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp84
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h7
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp256
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp158
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h17
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td6
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp15
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp7
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h9
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp83
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h8
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td144
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp7
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h9
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp9
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h3
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp236
-rw-r--r--lib/Target/R600/AMDGPU.h4
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp13
-rw-r--r--lib/Target/R600/AMDGPUCodeEmitter.h49
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.cpp122
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.h (renamed from lib/Target/R600/AMDILFrameLowering.h)6
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp15
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h25
-rw-r--r--lib/Target/R600/AMDGPUIndirectAddressing.cpp344
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp12
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h60
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td8
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td99
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp24
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.td11
-rw-r--r--lib/Target/R600/AMDGPUStructurizeCFG.cpp843
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h2
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp13
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h2
-rw-r--r--lib/Target/R600/AMDIL.h20
-rw-r--r--lib/Target/R600/AMDILDevice.h2
-rw-r--r--lib/Target/R600/AMDILFrameLowering.cpp47
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp222
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp9
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td1
-rw-r--r--lib/Target/R600/AMDILIntrinsics.td10
-rw-r--r--lib/Target/R600/AMDILPeepholeOptimizer.cpp10
-rw-r--r--lib/Target/R600/CMakeLists.txt6
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp48
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h11
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp87
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp321
-rw-r--r--lib/Target/R600/Processors.td1
-rw-r--r--lib/Target/R600/R600Defines.h18
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp199
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp691
-rw-r--r--lib/Target/R600/R600ISelLowering.h8
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp140
-rw-r--r--lib/Target/R600/R600InstrInfo.h32
-rw-r--r--lib/Target/R600/R600Instructions.td524
-rw-r--r--lib/Target/R600/R600Intrinsics.td23
-rw-r--r--lib/Target/R600/R600LowerConstCopy.cpp222
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp18
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h11
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp28
-rw-r--r--lib/Target/R600/R600RegisterInfo.td75
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp2
-rw-r--r--lib/Target/R600/SIISelLowering.cpp409
-rw-r--r--lib/Target/R600/SIISelLowering.h24
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp353
-rw-r--r--lib/Target/R600/SIInstrFormats.td470
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp73
-rw-r--r--lib/Target/R600/SIInstrInfo.h43
-rw-r--r--lib/Target/R600/SIInstrInfo.td742
-rw-r--r--lib/Target/R600/SIInstructions.td1106
-rw-r--r--lib/Target/R600/SIIntrinsics.td8
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp126
-rw-r--r--lib/Target/R600/SILowerLiteralConstants.cpp108
-rw-r--r--lib/Target/R600/SIRegisterInfo.td226
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp5
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp16
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h4
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp29
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp34
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h7
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp505
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp135
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp14
-rw-r--r--lib/Target/X86/README-SSE.txt9
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp18
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h2
-rw-r--r--lib/Target/X86/X86.td2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp47
-rw-r--r--lib/Target/X86/X86AsmPrinter.h8
-rw-r--r--lib/Target/X86/X86CallingConv.td3
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp2
-rw-r--r--lib/Target/X86/X86CompilationCallback_Win64.asm4
-rw-r--r--lib/Target/X86/X86FastISel.cpp100
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp309
-rw-r--r--lib/Target/X86/X86FrameLowering.h6
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp6
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp890
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--lib/Target/X86/X86Instr3DNow.td6
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td50
-rw-r--r--lib/Target/X86/X86InstrCompiler.td21
-rw-r--r--lib/Target/X86/X86InstrFMA.td14
-rw-r--r--lib/Target/X86/X86InstrFormats.td2
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.td20
-rw-r--r--lib/Target/X86/X86InstrSSE.td297
-rw-r--r--lib/Target/X86/X86InstrSystem.td8
-rw-r--r--lib/Target/X86/X86JITInfo.cpp34
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp3
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp77
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp217
-rw-r--r--lib/Target/X86/X86RegisterInfo.h7
-rw-r--r--lib/Target/X86/X86Schedule.td5
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp8
-rw-r--r--lib/Target/X86/X86Subtarget.cpp111
-rw-r--r--lib/Target/X86/X86Subtarget.h33
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp24
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp6
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp94
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp10
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp527
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp2
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp52
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h4
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp13
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp120
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h4
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td142
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td768
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp74
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h7
370 files changed, 40649 insertions, 11923 deletions
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
new file mode 100644
index 0000000..4de4faa
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.h
@@ -0,0 +1,42 @@
+//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AArch64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_H
+#define LLVM_TARGET_AARCH64_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64AsmPrinter;
+class FunctionPass;
+class AArch64TargetMachine;
+class MachineInstr;
+class MCInst;
+
+FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
+
+FunctionPass *createAArch64BranchFixupPass();
+
+void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AArch64AsmPrinter &AP);
+
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
new file mode 100644
index 0000000..e17052b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64.td
@@ -0,0 +1,70 @@
+//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// AArch64 Subtarget features.
+//
+
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+ "Enable Advanced SIMD instructions">;
+
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable cryptographic instructions">;
+
+//===----------------------------------------------------------------------===//
+// AArch64 Processors
+//
+
+include "AArch64Schedule.td"
+
+def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+
+include "AArch64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrInfo.td"
+
+def AArch64InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+
+def A64InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def AArch64 : Target {
+ let InstructionSet = AArch64InstrInfo;
+ let AssemblyWriters = [A64InstPrinter];
+}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
new file mode 100644
index 0000000..47ebb82
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -0,0 +1,347 @@
+//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64AsmPrinter.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ // See emitFrameIndexDebugValue in InstrInfo for where this instruction is
+ // expected to be created.
+ assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE");
+ return MachineLocation(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getImm());
+}
+
+/// Try to print a floating-point register as if it belonged to a specified
+/// register-class. For example the inline asm operand modifier "b" requires its
+/// argument to be printed as "bN".
+static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
+ const TargetRegisterInfo *TRI,
+ const TargetRegisterClass &RegClass,
+ raw_ostream &O) {
+ if (!MO.isReg())
+ return true;
+
+ for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+ if (RegClass.contains(*AR)) {
+ O << AArch64InstPrinter::getRegisterName(*AR);
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
+/// with the obvious type and an immediate 0 as either wzr or xzr.
+static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
+ const TargetRegisterInfo *TRI,
+ const TargetRegisterClass &RegClass,
+ raw_ostream &O) {
+ char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+
+ if (MO.isImm() && MO.getImm() == 0) {
+ O << Prefix << "zr";
+ return false;
+ } else if (MO.isReg()) {
+ if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
+ O << (Prefix == 'x' ? "sp" : "wsp");
+ return false;
+ }
+
+ for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
+ if (RegClass.contains(*AR)) {
+ O << AArch64InstPrinter::getRegisterName(*AR);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
+ bool PrintImmediatePrefix,
+ StringRef Suffix, raw_ostream &O) {
+ StringRef Name;
+ StringRef Modifier;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for symbolic address constraint");
+ case MachineOperand::MO_GlobalAddress:
+ Name = Mang->getSymbol(MO.getGlobal())->getName();
+
+ // Global variables may be accessed either via a GOT or in various fun and
+ // interesting TLS-model specific ways. Set the prefix modifier as
+ // appropriate here.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
+ Reloc::Model RelocM = TM.getRelocationModel();
+ if (GV->isThreadLocal()) {
+ switch (TM.getTLSModel(GV)) {
+ case TLSModel::GeneralDynamic:
+ Modifier = "tlsdesc";
+ break;
+ case TLSModel::LocalDynamic:
+ Modifier = "dtprel";
+ break;
+ case TLSModel::InitialExec:
+ Modifier = "gottprel";
+ break;
+ case TLSModel::LocalExec:
+ Modifier = "tprel";
+ break;
+ }
+ } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+ Modifier = "got";
+ }
+ }
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ Name = MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ Name = GetCPISymbol(MO.getIndex())->getName();
+ break;
+ }
+
+ // Some instructions (notably ADRP) don't take the # prefix for
+ // immediates. Only print it if asked to.
+ if (PrintImmediatePrefix)
+ O << '#';
+
+ // Only need the joining "_" if both the prefix and the suffix are
+ // non-null. This little block simply takes care of the four possibly
+ // combinations involved there.
+ if (Modifier == "" && Suffix == "")
+ O << Name;
+ else if (Modifier == "" && Suffix != "")
+ O << ":" << Suffix << ':' << Name;
+ else if (Modifier != "" && Suffix == "")
+ O << ":" << Modifier << ':' << Name;
+ else
+ O << ":" << Modifier << '_' << Suffix << ':' << Name;
+
+ return false;
+}
+
+bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ if (!ExtraCode || !ExtraCode[0]) {
+ // There's actually no operand modifier, which leads to a slightly eclectic
+ // set of behaviour which we have to handle here.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for inline assembly");
+ case MachineOperand::MO_Register:
+ // GCC prints the unmodified operand of a 'w' constraint as the vector
+ // register. Technically, we could allocate the argument as a VPR128, but
+ // that leads to extremely dodgy copies being generated to get the data
+ // there.
+ if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
+ O << AArch64InstPrinter::getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << '#' << MO.getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+ O << "#0.0";
+ break;
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ return printSymbolicAddress(MO, false, "", O);
+ }
+ return false;
+ }
+
+ // We have a real modifier to handle.
+ switch(ExtraCode[0]) {
+ default:
+ // See if this is a generic operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ case 'c': // Don't print "#" before an immediate operand.
+ if (!MI->getOperand(OpNum).isImm())
+ return true;
+ O << MI->getOperand(OpNum).getImm();
+ return false;
+ case 'w':
+ // Output 32-bit general register operand, constant zero as wzr, or stack
+ // pointer as wsp. Ignored when used with other operand types.
+ return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR32RegClass, O);
+ case 'x':
+ // Output 64-bit general register operand, constant zero as xzr, or stack
+ // pointer as sp. Ignored when used with other operand types.
+ return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR64RegClass, O);
+ case 'H':
+ // Output higher numbered of a 64-bit general register pair
+ case 'Q':
+ // Output least significant register of a 64-bit general register pair
+ case 'R':
+ // Output most significant register of a 64-bit general register pair
+
+ // FIXME note: these three operand modifiers will require, to some extent,
+ // adding a paired GPR64 register class. Initial investigation suggests that
+ // assertions are hit unless it has a type and is made legal for that type
+ // in ISelLowering. After that step is made, the number of modifications
+ // needed explodes (operation legality, calling conventions, stores, reg
+ // copies ...).
+ llvm_unreachable("FIXME: Unimplemented register pairs");
+ case 'b':
+ // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR8RegClass, O);
+ case 'h':
+ // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR16RegClass, O);
+ case 's':
+ // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR32RegClass, O);
+ case 'd':
+ // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR64RegClass, O);
+ case 'q':
+ // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
+ return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::FPR128RegClass, O);
+ case 'A':
+ // Output symbolic address with appropriate relocation modifier (also
+ // suitable for ADRP).
+ return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+ case 'L':
+ // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
+ // modifier.
+ return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+ case 'G':
+ // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
+ // modifier (currently only for TLS local exec).
+ return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+ }
+
+
+}
+
+bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ // Currently both the memory constraints (m and Q) behave the same and amount
+ // to the address as a single register. In future, we may allow "m" to provide
+ // both a base and an offset.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isReg() && "unexpected inline assembly memory operand");
+ O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+ return false;
+}
+
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg());
+ OS << '+' << MI->getOperand(1).getImm();
+ OS << ']';
+ OS << "+" << MI->getOperand(NOps - 2).getImm();
+}
+
+
+#include "AArch64GenMCPseudoLowering.inc"
+
+void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, MI))
+ return;
+
+ switch (MI->getOpcode()) {
+ case AArch64::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ }
+
+ MCInst TmpInst;
+ LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(0), 0);
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmPrinter() {
+ RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64Target);
+}
+
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h
new file mode 100644
index 0000000..af0c9fe
--- /dev/null
+++ b/lib/Target/AArch64/AArch64AsmPrinter.h
@@ -0,0 +1,80 @@
+// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64ASMPRINTER_H
+#define LLVM_AARCH64ASMPRINTER_H
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+ // emitPseudoExpansionLowering - tblgen'erated.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ public:
+ explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ }
+
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+ MCOperand lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Sym) const;
+
+ void EmitInstruction(const MachineInstr *MI);
+ void EmitEndOfAsmFile(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ /// printSymbolicAddress - Given some kind of reasonably bare symbolic
+ /// reference, print out the appropriate asm string to represent it. If
+ /// appropriate, a relocation-specifier will be produced, composed of a
+ /// general class derived from the MO parameter and an instruction-specific
+ /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
+ /// given.
+ bool printSymbolicAddress(const MachineOperand &MO,
+ bool PrintImmediatePrefix,
+ StringRef Suffix, raw_ostream &O);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ virtual const char *getPassName() const {
+ return "AArch64 Assembly Printer";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
new file mode 100644
index 0000000..71233ba
--- /dev/null
+++ b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
@@ -0,0 +1,600 @@
+//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that fixes AArch64 branches which have ended up out
+// of range for their immediate operands.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-branch-fixup"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+
+/// Return the worst case padding that could result from unknown offset bits.
+/// This does not include alignment padding caused by known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+namespace {
+ /// Due to limited PC-relative displacements, conditional branches to distant
+ /// blocks may need converting into an unconditional equivalent. For example:
+ /// tbz w1, #0, far_away
+ /// becomes
+ /// tbnz w1, #0, skip
+ /// b far_away
+ /// skip:
+ class AArch64BranchFixup : public MachineFunctionPass {
+ /// Information about the offset and size of a single basic block.
+ struct BasicBlockInfo {
+ /// Distance from the beginning of the function to the beginning of this
+ /// basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size of the basic block in bytes. If the block contains inline
+ /// assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// The number of low bits in Offset that are known to be exact. The
+ /// remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// When non-zero, the block contains instructions (inline asm) of unknown
+ /// size. The real size may be smaller than Size bytes by a multiple of 1
+ /// << Unalign.
+ uint8_t Unalign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = CountTrailingZeros_32(Size);
+ return Bits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ if (!LogAlign)
+ return PO;
+ // Add alignment padding from the terminator.
+ return PO + UnknownPadding(LogAlign, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(LogAlign, internalKnownBits());
+ }
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// One per immediate branch, keeping the machine instruction pointer,
+ /// conditional or unconditional, the max displacement, and (if IsCond is
+ /// true) the corresponding inverted branch opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned OffsetBits : 31;
+ bool IsCond : 1;
+ ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
+ : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
+ };
+
+ /// Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ MachineFunction *MF;
+ const AArch64InstrInfo *TII;
+ public:
+ static char ID;
+ AArch64BranchFixup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "AArch64 branch fixup pass";
+ }
+
+ private:
+ void initializeFunctionInfo();
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned OffsetBits);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify();
+ };
+ char AArch64BranchFixup::ID = 0;
+}
+
+/// check BBOffsets
+void AArch64BranchFixup::verify() {
+#ifndef NDEBUG
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ unsigned MBBId = MBB->getNumber();
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void AArch64BranchFixup::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
+
+/// Returns an instance of the branch fixup pass.
+FunctionPass *llvm::createAArch64BranchFixupPass() {
+ return new AArch64BranchFixup();
+}
+
+bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ DEBUG(dbgs() << "***** AArch64BranchFixup ******");
+ TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
+
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block and location of each immediate branch.
+ initializeFunctionInfo();
+
+ // Iteratively fix up branches until there is no change.
+ unsigned NoBRIters = 0;
+ bool MadeChange = false;
+ while (true) {
+ DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ if (!BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ ImmBranches.clear();
+
+ return MadeChange;
+}
+
+/// Return true if the specified basic block can fallthrough into the block
+/// immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// Do the initial scan of the function, building up information about the sizes
+/// of each block, and each immediate branch.
+void AArch64BranchFixup::initializeFunctionInfo() {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ adjustBBOffsetsAfter(MF->begin());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ int Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool IsCond = false;
+
+ // The offsets encoded in instructions here scale by the instruction
+ // size (4 bytes), effectively increasing their range by 2 bits.
+ unsigned Bits = 0;
+ switch (Opc) {
+ default:
+ continue; // Ignore other JT branches
+ case AArch64::TBZxii:
+ case AArch64::TBZwii:
+ case AArch64::TBNZxii:
+ case AArch64::TBNZwii:
+ IsCond = true;
+ Bits = 14 + 2;
+ break;
+ case AArch64::Bcc:
+ case AArch64::CBZx:
+ case AArch64::CBZw:
+ case AArch64::CBNZx:
+ case AArch64::CBNZw:
+ IsCond = true;
+ Bits = 19 + 2;
+ break;
+ case AArch64::Bimm:
+ Bits = 26 + 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
+ }
+ }
+ }
+}
+
+/// Compute the size and some alignment information for MBB. This function
+/// updates BBInfo directly.
+void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->getInstSizeInBytes(*I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = 2;
+ }
+}
+
+/// Return the current offset of the specified machine instruction from the
+/// start of the function. This offset changes as stuff is moved around inside
+/// the function.
+unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+ return Offset;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *
+AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
+ }
+}
+
+/// Returns true if the distance between specific MI and specific BB can fit in
+/// MI's displacement field.
+bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
+ MachineBasicBlock *DestBB,
+ unsigned OffsetBits) {
+ int64_t BrOffset = getOffsetOf(MI);
+ int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " bits available=" << OffsetBits
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ return isIntN(OffsetBits, DestOffset - BrOffset);
+}
+
+/// Fix up an immediate branch whose destination is too far away to fit in its
+/// displacement field.
+bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isMBB()) {
+ DestBB = MI->getOperand(i).getMBB();
+ break;
+ }
+ }
+ assert(DestBB && "Branch with no destination BB?");
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.OffsetBits))
+ return false;
+
+ assert(Br.IsCond && "Only conditional branches should need fixup");
+ return fixupConditionalBr(Br);
+}
+
+/// Fix up a conditional branch whose destination is too far away to fit in its
+/// displacement field. It is converted to an inverse conditional branch + an
+/// unconditional branch to the destination.
+bool
+AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ unsigned CondBrMBBOperand = 0;
+
+ // The general idea is to add an unconditional branch to the destination and
+ // invert the conditional branch to jump over it. Complications occur around
+ // fallthrough and unreachable ends to the block.
+ // b.lt L1
+ // =>
+ // b.ge L2
+ // b L1
+ // L2:
+
+ // First we invert the conditional branch, by creating a replacement if
+ // necessary. This if statement contains all the special handling of different
+ // branch types.
+ if (MI->getOpcode() == AArch64::Bcc) {
+ // The basic block is operand number 1 for Bcc
+ CondBrMBBOperand = 1;
+
+ A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
+ CC = A64InvertCondCode(CC);
+ MI->getOperand(0).setImm(CC);
+ } else {
+ MachineInstrBuilder InvertedMI;
+ int InvertedOpcode;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown branch type");
+ case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
+ case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
+ case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
+ case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
+ case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
+ case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
+ case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
+ case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
+ }
+
+ InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
+ for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
+ InvertedMI.addOperand(MI->getOperand(i));
+ if (MI->getOperand(i).isMBB())
+ CondBrMBBOperand = i;
+ }
+
+ MI->eraseFromParent();
+ MI = Br.MI = InvertedMI;
+ }
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through
+ // block. Otherwise, split the MBB before the next instruction.
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == AArch64::Bimm) {
+ // Last MI in the BB is an unconditional branch. We can swap destinations:
+ // b.eq L1 (temporarily b.ne L1 after first change)
+ // b L2
+ // =>
+ // b.ne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ MachineBasicBlock::iterator MBBI = MI; ++MBBI;
+ splitBlockBeforeInstr(MBBI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->getInstSizeInBytes(MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+
+ // After splitting and removing the unconditional branch from the original BB,
+ // the structure is now:
+ // oldbb:
+ // [things]
+ // b.invertedCC L1
+ // splitbb/fallthroughbb:
+ // [old b L2/real continuation]
+ //
+ // We now have to change the conditional branch to point to splitbb and add an
+ // unconditional branch after it to L1, giving the final structure:
+ // oldbb:
+ // [things]
+ // b.invertedCC splitbb
+ // b L1
+ // splitbb/fallthroughbb:
+ // [old b L2/real continuation]
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#"
+ << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new unconditional branch and fixup the destination of the
+ // conditional one. Also update the ImmBranch as well as adding a new entry
+ // for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
+ .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
+ MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
+
+ BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+
+ // 26 bits written down in Bimm, specifying a multiple of 4.
+ unsigned OffsetBits = 26 + 2;
+ ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
+
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
new file mode 100644
index 0000000..b880d83
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -0,0 +1,196 @@
+//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AArch64 architecture.
+//===----------------------------------------------------------------------===//
+
+
+// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
+// higher level of abstraction than LLVM's target interface presents. In
+// particular, it refers (like other ABIs, in fact) directly to
+// structs. However, generic LLVM code takes the liberty of lowering structure
+// arguments to the component fields before we see them.
+//
+// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
+// implemented, so the goals of this calling convention are, in decreasing
+// priority order:
+// 1. Expose *some* way to express the concepts required to implement the
+// generic PCS from a front-end.
+// 2. Provide a sane ABI for pure LLVM.
+// 3. Follow the generic PCS as closely as is naturally possible.
+//
+// The suggested front-end implementation of PCS features is:
+// * Integer, float and vector arguments of all sizes which end up in
+// registers are passed and returned via the natural LLVM type.
+// * Structure arguments with size <= 16 bytes are passed and returned in
+// registers as similar integer or composite types. For example:
+// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
+// * HFAs in registers follow rules similar to small structs: appropriate
+// composite types.
+// * Structure arguments with size > 16 bytes are passed via a pointer,
+// handled completely by the front-end.
+// * Structure return values > 16 bytes via an sret pointer argument.
+// * Other stack-based arguments (not large structs) are passed using byval
+// pointers. Padding arguments are added beforehand to guarantee a large
+// struct doesn't later use integer registers.
+//
+// N.b. this means that it is the front-end's responsibility (if it cares about
+// PCS compliance) to check whether enough registers are available for an
+// argument when deciding how to pass it.
+
+class CCIfAlign<int Align, CCAction A>:
+ CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
+
+def CC_A64_APCS : CallingConv<[
+ // SRet is an LLVM-specific concept, so it takes precedence over general ABI
+ // concerns. However, this rule will be used by C/C++ frontends to implement
+ // structure return.
+ CCIfSRet<CCAssignToReg<[X8]>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Canonicalise the various types that live in different floating-point
+ // registers. This makes sense because the PCS does not distinguish Short
+ // Vectors and Floating-point types.
+ CCIfType<[v2i8], CCBitConvertToType<f16>>,
+ CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCBitConvertToType<f128>>,
+
+ // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
+ // Floating-point or Short Vector Type and the NSRN is less than 8, then the
+ // argument is allocated to the least significant bits of register
+ // v[NSRN]. The NSRN is incremented by one. The argument has now been
+ // allocated."
+ CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+
+ // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
+ // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
+ // argument is allocated to SIMD and Floating-point registers (with one
+ // register per element of the HFA). The NSRN is incremented by the number of
+ // registers used. The argument has now been allocated."
+ //
+ // N.b. As above, this rule is the responsibility of the front-end.
+
+ // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
+ // the argument is rounded up to the nearest multiple of 8 bytes."
+ //
+ // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
+ // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
+ // Alignment of the Argument's type."
+ //
+ // It is expected that these will be satisfied by adding dummy arguments to
+ // the prototype.
+
+ // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
+ // type then the size of the argument is set to 8 bytes. The effect is as if
+ // the argument had been copied to the least significant bits of a 64-bit
+ // register and the remaining bits filled with unspecified values."
+ CCIfType<[f16, f32], CCPromoteToType<f64>>,
+
+ // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
+ // precision Floating-point or Short Vector Type, then the argument is copied
+ // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
+ // argument. The argument has now been allocated."
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+ CCIfType<[f128], CCAssignToStack<16, 16>>,
+
+ // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
+ // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
+ // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
+ // one. The argument has now been allocated."
+
+ // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
+ // represented as two i64s, the first one being split. If we delayed this
+ // operation C.8 would never be reached.
+ CCIfType<[i64],
+ CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
+
+ // Note: the promotion also implements C.14.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // And now the real implementation of C.7
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+
+ // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
+ // up to the next even number."
+ //
+ // "C.9: If the argument is an Integral Type, the size of the argument is
+ // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
+ // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
+ // memory representation of the argument. The NGRN is incremented by two. The
+ // argument has now been allocated."
+ //
+ // Subtlety here: what if alignment is 16 but it is not an integral type? All
+ // floating-point types have been allocated already, which leaves composite
+ // types: this is why a front-end may need to produce i128 for a struct <= 16
+ // bytes.
+
+ // PCS: "C.10 If the argument is a Composite Type and the size in double-words
+ // of the argument is not more than 8 minus NGRN, then the argument is copied
+ // into consecutive general-purpose registers, starting at x[NGRN]. The
+ // argument is passed as though it had been loaded into the registers from a
+ // double-word aligned address with an appropriate sequence of LDR
+ // instructions loading consecutive registers from memory (the contents of any
+ // unused parts of the registers are unspecified by this standard). The NGRN
+ // is incremented by the number of registers used. The argument has now been
+ // allocated."
+ //
+ // Another one that's the responsibility of the front-end (sigh).
+
+ // PCS: "C.11: The NGRN is set to 8."
+ CCCustom<"CC_AArch64NoMoreRegs">,
+
+ // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
+ // Alignment of the argument's type."
+ //
+ // PCS: "C.13: If the argument is a composite type then the argument is copied
+ // to memory at the adjusted NSAA. The NSAA is by the size of the
+ // argument. The argument has now been allocated."
+ //
+ // Note that the effect of this corresponds to a memcpy rather than register
+ // stores so that the struct ends up correctly addressable at the adjusted
+ // NSAA.
+
+ // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
+ // of the argument is set to 8 bytes. The effect is as if the argument was
+ // copied to the least significant bits of a 64-bit register and the remaining
+ // bits filled with unspecified values."
+ //
+ // Integer types were widened above. Floating-point and composite types have
+ // already been allocated completely. Nothing to do.
+
+ // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
+ // is incremented by the size of the argument. The argument has now been
+ // allocated."
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64], CCAssignToStack<8, 8>>
+
+]>;
+
+// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
+// of vector registers (8-15) are callee-saved. The order here is is picked up
+// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
+// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
+// [sp-16], ...
+def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
+ (sequence "D%u", 15, 8))>;
+
+
+// TLS descriptor calls are extremely restricted in their changes, to allow
+// optimisations in the (hopefully) more common fast path where no real action
+// is needed. They actually have to preserve all registers, except for the
+// unavoidable X30 and the return register X0.
+def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
+ (sequence "Q%u", 31, 0))>;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
new file mode 100644
index 0000000..cca6d12
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -0,0 +1,686 @@
+//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
+ uint64_t &Initial,
+ uint64_t &Residual) const {
+ // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
+ // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
+ // 0x1f8, but stack adjustment should always be a multiple of 16.
+ if (Total <= 0x1f0) {
+ Initial = Total;
+ Residual = 0;
+ } else {
+ Initial = 0x1f0;
+ Residual = Total - Initial;
+ }
+}
+
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ bool NeedsFrameMoves = MMI.hasDebugInfo()
+ || MF.getFunction()->needsUnwindTableEntry();
+
+ uint64_t NumInitialBytes, NumResidualBytes;
+
+ // Currently we expect the stack to be laid out by
+ // sub sp, sp, #initial
+ // stp x29, x30, [sp, #offset]
+ // ...
+ // str xxx, [sp, #offset]
+ // sub sp, sp, #rest (possibly via extra instructions).
+ if (MFI->getCalleeSavedInfo().size()) {
+ // If there are callee-saved registers, we want to store them efficiently as
+ // a block, and virtual base assignment happens too early to do it for us so
+ // we adjust the stack in two phases: first just for callee-saved fiddling,
+ // then to allocate the rest of the frame.
+ splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
+ } else {
+ // If there aren't any callee-saved registers, two-phase adjustment is
+ // inefficient. It's more efficient to adjust with NumInitialBytes too
+ // because when we're in a "callee pops argument space" situation, that pop
+ // must be tacked onto Initial for correctness.
+ NumInitialBytes = MFI->getStackSize();
+ NumResidualBytes = 0;
+ }
+
+ // Tell everyone else how much adjustment we're expecting them to use. In
+ // particular if an adjustment is required for a tail call the epilogue could
+ // have a different view of things.
+ FuncInfo->setInitialStackAdjust(NumInitialBytes);
+
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
+ MachineInstr::FrameSetup);
+
+ if (NeedsFrameMoves && NumInitialBytes) {
+ // We emit this update even if the CFA is set from a frame pointer later so
+ // that the CFA is valid in the interim.
+ MCSymbol *SPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(SPLabel);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, NumInitialBytes);
+ Moves.push_back(MachineMove(SPLabel, Dst, Src));
+ }
+
+ // Otherwise we need to set the frame pointer and/or add a second stack
+ // adjustment.
+
+ bool FPNeedsSetting = hasFP(MF);
+ for (; MBBI != MBB.end(); ++MBBI) {
+ // Note that this search makes strong assumptions about the operation used
+ // to store the frame-pointer: it must be "STP x29, x30, ...". This could
+ // change in future, but until then there's no point in implementing
+ // untestable more generic cases.
+ if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
+ && MBBI->getOperand(0).getReg() == AArch64::X29) {
+ int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
+ FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
+
+ ++MBBI;
+ emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
+ AArch64::X29,
+ NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
+ MachineInstr::FrameSetup);
+
+ // The offset adjustment used when emitting debugging locations relative
+ // to whatever frame base is set. AArch64 uses the default frame base (FP
+ // or SP) and this adjusts the calculations to be correct.
+ MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
+ - MFI->getStackSize());
+
+ if (NeedsFrameMoves) {
+ MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(FPLabel);
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx));
+ Moves.push_back(MachineMove(FPLabel, Dst, Src));
+ }
+
+ FPNeedsSetting = false;
+ }
+
+ if (!MBBI->getFlag(MachineInstr::FrameSetup))
+ break;
+ }
+
+ assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
+ MachineInstr::FrameSetup);
+
+ // Now we emit the rest of the frame setup information, if necessary: we've
+ // already noted the FP and initial SP moves so we're left with the prologue's
+ // final SP update and callee-saved register locations.
+ if (!NeedsFrameMoves)
+ return;
+
+ // Reuse the label if appropriate, so create it in this outer scope.
+ MCSymbol *CSLabel = 0;
+
+ // The rest of the stack adjustment
+ if (!hasFP(MF) && NumResidualBytes) {
+ CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(CSLabel);
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes);
+ Moves.push_back(MachineMove(CSLabel, Dst, Src));
+ }
+
+ // And any callee-saved registers (it's fine to leave them to the end here,
+ // because the old values are still valid at this point.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.size()) {
+ if (!CSLabel) {
+ CSLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL))
+ .addSym(CSLabel);
+ }
+
+ for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+ E = CSI.end(); I != E; ++I) {
+ MachineLocation Dst(MachineLocation::VirtualFP,
+ MFI->getObjectOffset(I->getFrameIdx()));
+ MachineLocation Src(I->getReg());
+ Moves.push_back(MachineMove(CSLabel, Dst, Src));
+ }
+ }
+}
+
+void
+AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ // Initial and residual are named for consitency with the prologue. Note that
+ // in the epilogue, the residual adjustment is executed first.
+ uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
+ uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
+ uint64_t ArgumentPopSize = 0;
+ if (RetOpcode == AArch64::TC_RETURNdi ||
+ RetOpcode == AArch64::TC_RETURNxi) {
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+
+ MachineInstrBuilder MIB;
+ if (RetOpcode == AArch64::TC_RETURNdi) {
+ MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
+ if (JumpTarget.isGlobal()) {
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else {
+ assert(JumpTarget.isSymbol() && "unexpected tail call destination");
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else {
+ assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
+ && "Unexpected tail call");
+
+ MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
+ MIB.addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ // Add the extra operands onto the new tail call instruction even though
+ // they're not used directly (so that liveness is tracked properly etc).
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ MIB->addOperand(MBBI->getOperand(i));
+
+
+ // Delete the pseudo instruction TC_RETURN.
+ MachineInstr *NewMI = prior(MBBI);
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+
+ // For a tail-call in a callee-pops-arguments environment, some or all of
+ // the stack may actually be in use for the call's arguments, this is
+ // calculated during LowerCall and consumed here...
+ ArgumentPopSize = StackAdjust.getImm();
+ } else {
+ // ... otherwise the amount to pop is *all* of the argument space,
+ // conveniently stored in the MachineFunctionInfo by
+ // LowerFormalArguments. This will, of course, be zero for the C calling
+ // convention.
+ ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+ }
+
+ assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
+ && "refusing to adjust stack by misaligned amt");
+
+ // We may need to address callee-saved registers differently, so find out the
+ // bound on the frame indices.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ // The "residual" stack update comes first from this direction and guarantees
+ // that SP is NumInitialBytes below its value on function entry, either by a
+ // direct update or restoring it from the frame pointer.
+ if (NumInitialBytes + ArgumentPopSize != 0) {
+ emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
+ NumInitialBytes + ArgumentPopSize);
+ --MBBI;
+ }
+
+
+ // MBBI now points to the instruction just past the last callee-saved
+ // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
+ // otherwise).
+
+ // Now we need to find out where to put the bulk of the stack adjustment
+ MachineBasicBlock::iterator FirstEpilogue = MBBI;
+ while (MBBI != MBB.begin()) {
+ --MBBI;
+
+ unsigned FrameOp;
+ for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
+ if (MBBI->getOperand(FrameOp).isFI())
+ break;
+ }
+
+ // If this instruction doesn't have a frame index we've reached the end of
+ // the callee-save restoration.
+ if (FrameOp == MBBI->getNumOperands())
+ break;
+
+ // Likewise if it *is* a local reference, but not to a callee-saved object.
+ int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
+ if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
+ break;
+
+ FirstEpilogue = MBBI;
+ }
+
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ int64_t StaticFrameBase;
+ StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
+ emitRegUpdate(MBB, FirstEpilogue, DL, TII,
+ AArch64::XSP, AArch64::X29, AArch64::NoRegister,
+ StaticFrameBase);
+ } else {
+ emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+ }
+}
+
+int64_t
+AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
+ int FrameIndex,
+ unsigned &FrameReg,
+ int SPAdj,
+ bool IsCalleeSaveOp) const {
+ AArch64MachineFunctionInfo *FuncInfo =
+ MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
+
+ assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
+ && "callee-saved register in unexpected place");
+
+ // If the frame for this function is particularly large, we adjust the stack
+ // in two phases which means the callee-save related operations see a
+ // different (intermediate) stack size.
+ int64_t FrameRegPos;
+ if (IsCalleeSaveOp) {
+ FrameReg = AArch64::XSP;
+ FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
+ } else if (useFPForAddressing(MF)) {
+ // Have to use the frame pointer since we have no idea where SP is.
+ FrameReg = AArch64::X29;
+ FrameRegPos = FuncInfo->getFramePointerOffset();
+ } else {
+ FrameReg = AArch64::XSP;
+ FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+ }
+
+ return TopOfFrameOffset - FrameRegPos;
+}
+
+/// Estimate and return the size of the frame.
+static unsigned estimateStackSize(MachineFunction &MF) {
+ // FIXME: Make generic? Really consider after upstreaming. This code is now
+ // shared between PEI, ARM *and* here.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -MFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ Offset += MFI->getObjectSize(i);
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+void
+AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ const AArch64RegisterInfo *RegInfo =
+ static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64InstrInfo &TII =
+ *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(AArch64::X29);
+ MF.getRegInfo().setPhysRegUsed(AArch64::X30);
+ }
+
+ // If addressing of local variables is going to be more complicated than
+ // shoving a base register and an offset into the instruction then we may well
+ // need to scavenge registers. We should either specifically add an
+ // callee-save register for this purpose or allocate an extra spill slot.
+
+ bool BigStack =
+ (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
+ || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
+
+ if (!BigStack)
+ return;
+
+ // We certainly need some slack space for the scavenger, preferably an extra
+ // register.
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ uint16_t ExtraReg = AArch64::NoRegister;
+
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
+ !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
+ ExtraReg = CSRegs[i];
+ break;
+ }
+ }
+
+ if (ExtraReg != 0) {
+ MF.getRegInfo().setPhysRegUsed(ExtraReg);
+ } else {
+ // Create a stack slot for scavenging purposes. PrologEpilogInserter
+ // helpfully places it near either SP or FP for us to avoid
+ // infinitely-regression during scavenging.
+ const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
+ unsigned Reg) const {
+ // If @llvm.returnaddress is called then it will refer to X30 by some means;
+ // the prologue store does not kill the register.
+ if (Reg == AArch64::X30) {
+ if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
+ && MBB.getParent()->getRegInfo().isLiveIn(Reg))
+ return false;
+ }
+
+ // In all other cases, physical registers are dead after they've been saved
+ // but live at the beginning of the prologue block.
+ MBB.addLiveIn(Reg);
+ return true;
+}
+
+void
+AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI,
+ LoadStoreMethod PossClasses[],
+ unsigned NumClasses) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // A certain amount of implicit contract is present here. The actual stack
+ // offsets haven't been allocated officially yet, so for strictly correct code
+ // we rely on the fact that the elements of CSI are allocated in order
+ // starting at SP, purely as dictated by size and alignment. In practice since
+ // this function handles the only accesses to those slots it's not quite so
+ // important.
+ //
+ // We have also ordered the Callee-saved register list in AArch64CallingConv
+ // so that the above scheme puts registers in order: in particular we want
+ // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
+ for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ // First we need to find out which register class the register belongs to so
+ // that we can use the correct load/store instrucitons.
+ unsigned ClassIdx;
+ for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
+ if (PossClasses[ClassIdx].RegClass->contains(Reg))
+ break;
+ }
+ assert(ClassIdx != NumClasses
+ && "Asked to store register in unexpected class");
+ const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
+
+ // Now we need to decide whether it's possible to emit a paired instruction:
+ // for this we want the next register to be in the same class.
+ MachineInstrBuilder NewMI;
+ bool Pair = false;
+ if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
+ Pair = true;
+ unsigned StLow = 0, StHigh = 0;
+ if (isPrologue) {
+ // Most of these registers will be live-in to the MBB and killed by our
+ // store, though there are exceptions (see determinePrologueDeath).
+ StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
+ StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ } else {
+ StLow = RegState::Define;
+ StHigh = RegState::Define;
+ }
+
+ NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
+ .addReg(CSI[i+1].getReg(), StLow)
+ .addReg(CSI[i].getReg(), StHigh);
+
+ // If it's a paired op, we've consumed two registers
+ ++i;
+ } else {
+ unsigned State;
+ if (isPrologue) {
+ State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ } else {
+ State = RegState::Define;
+ }
+
+ NewMI = BuildMI(MBB, MBBI, DL,
+ TII.get(PossClasses[ClassIdx].SingleOpcode))
+ .addReg(CSI[i].getReg(), State);
+ }
+
+ // Note that the FrameIdx refers to the second register in a pair: it will
+ // be allocated the smaller numeric address and so is the one an LDP/STP
+ // address must use.
+ int FrameIdx = CSI[i].getFrameIdx();
+ MachineMemOperand::MemOperandFlags Flags;
+ Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ Flags,
+ Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
+ MFI.getObjectAlignment(FrameIdx));
+
+ NewMI.addFrameIndex(FrameIdx)
+ .addImm(0) // address-register offset
+ .addMemOperand(MMO);
+
+ if (isPrologue)
+ NewMI.setMIFlags(MachineInstr::FrameSetup);
+
+ // For aesthetic reasons, during an epilogue we want to emit complementary
+ // operations to the prologue, but in the opposite order. So we still
+ // iterate through the CalleeSavedInfo list in order, but we put the
+ // instructions successively earlier in the MBB.
+ if (!isPrologue)
+ --MBBI;
+ }
+}
+
+bool
+AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ static LoadStoreMethod PossibleClasses[] = {
+ {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
+ {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
+ };
+ unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+ emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
+ PossibleClasses, NumClasses);
+
+ return true;
+}
+
+bool
+AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ if (CSI.empty())
+ return false;
+
+ static LoadStoreMethod PossibleClasses[] = {
+ {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
+ {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
+ };
+ unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
+
+ emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
+ PossibleClasses, NumClasses);
+
+ return true;
+}
+
+bool
+AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // This is a decision of ABI compliance. The AArch64 PCS gives various options
+ // for conformance, and even at the most stringent level more or less permits
+ // elimination for leaf functions because there's no loss of functionality
+ // (for debugging etc)..
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
+ return true;
+
+ // The following are hard-limits: incorrect code will be generated if we try
+ // to omit the frame.
+ return (RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool
+AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
+ return MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Of the various reasons for having a frame pointer, it's actually only
+ // variable-sized objects that prevent reservation of a call frame.
+ return !(hasFP(MF) && MFI->hasVarSizedObjects());
+}
+
+void
+AArch64FrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ const AArch64InstrInfo &TII =
+ *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MI->getDebugLoc();
+ int Opcode = MI->getOpcode();
+ bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
+
+ if (!hasReservedCallFrame(MF)) {
+ unsigned Align = getStackAlignment();
+
+ int64_t Amount = MI->getOperand(0).getImm();
+ Amount = RoundUpToAlignment(Amount, Align);
+ if (!IsDestroy) Amount = -Amount;
+
+ // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+ // doesn't have to pop anything), then the first operand will be zero too so
+ // this adjustment is a no-op.
+ if (CalleePopAmount == 0) {
+ // FIXME: in-function stack adjustment for calls is limited to 12-bits
+ // because there's no guaranteed temporary register available. Mostly call
+ // frames will be allocated at the start of a function so this is OK, but
+ // it is a limitation that needs dealing with.
+ assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
+ emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+ }
+ } else if (CalleePopAmount != 0) {
+ // If the calling convention demands that the callee pops arguments from the
+ // stack, we want to add it back if we have a reserved call frame.
+ assert(CalleePopAmount < 0xfff && "call frame too large");
+ emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
+ }
+
+ MBB.erase(MI);
+}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
new file mode 100644
index 0000000..45ea0ec
--- /dev/null
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -0,0 +1,108 @@
+//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the AArch64-specific parts of the TargetFrameLowering
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_FRAMEINFO_H
+#define LLVM_AARCH64_FRAMEINFO_H
+
+#include "AArch64Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class AArch64Subtarget;
+
+class AArch64FrameLowering : public TargetFrameLowering {
+private:
+ // In order to unify the spilling and restoring of callee-saved registers into
+ // emitFrameMemOps, we need to be able to specify which instructions to use
+ // for the relevant memory operations on each register class. An array of the
+ // following struct is populated and passed in to achieve this.
+ struct LoadStoreMethod {
+ const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
+
+ // The preferred instruction.
+ unsigned PairOpcode; // E.g. LSPair64_STR
+
+ // Sometimes only a single register can be handled at once.
+ unsigned SingleOpcode; // E.g. LS64_STR
+ };
+protected:
+ const AArch64Subtarget &STI;
+
+public:
+ explicit AArch64FrameLowering(const AArch64Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Decides how much stack adjustment to perform in each phase of the prologue
+ /// and epilogue.
+ void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
+ uint64_t &Residual) const;
+
+ int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
+ unsigned &FrameReg, int SPAdj,
+ bool IsCalleeSaveOp) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// If the register is X30 (i.e. LR) and the return address is used in the
+ /// function then the callee-save store doesn't actually kill the register,
+ /// otherwise it does.
+ bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
+
+ /// This function emits the loads or stores required during prologue and
+ /// epilogue as efficiently as possible.
+ ///
+ /// The operations involved in setting up and tearing down the frame are
+ /// similar enough to warrant a shared function, particularly as discrepancies
+ /// between the two would be disastrous.
+ void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI,
+ LoadStoreMethod PossibleClasses[],
+ unsigned NumClasses) const;
+
+
+ virtual bool hasFP(const MachineFunction &MF) const;
+
+ virtual bool useFPForAddressing(const MachineFunction &MF) const;
+
+ /// On AA
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
new file mode 100644
index 0000000..46b8221
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -0,0 +1,415 @@
+//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AArch64 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// AArch64 specific code to select AArch64 machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+
+class AArch64DAGToDAGISel : public SelectionDAGISel {
+ AArch64TargetMachine &TM;
+ const AArch64InstrInfo *TII;
+
+ /// Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+public:
+ explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel), TM(tm),
+ TII(static_cast<const AArch64InstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "AArch64 Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+ template<unsigned MemSize>
+ bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN || CN->getZExtValue() % MemSize != 0
+ || CN->getZExtValue() / MemSize > 0xfff)
+ return false;
+
+ UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
+ return true;
+ }
+
+ template<unsigned RegWidth>
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
+
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth);
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ bool SelectLogicalImm(SDValue N, SDValue &Imm);
+
+ template<unsigned RegWidth>
+ bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
+ return SelectTSTBOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+
+ SDNode *TrySelectToMoveImm(SDNode *N);
+ SDNode *LowerToFPLitPool(SDNode *Node);
+ SDNode *SelectToLitPool(SDNode *N);
+
+ SDNode* Select(SDNode*);
+private:
+};
+}
+
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ if (!CN) return false;
+
+ // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+ // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+ // x-register.
+ //
+ // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+ // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+ // integers.
+ bool IsExact;
+
+ // fbits is between 1 and 64 in the worst-case, which means the fmul
+ // could have 2^64 as an actual operand. Need 65 bits of precision.
+ APSInt IntVal(65, true);
+ CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+ // N.b. isPowerOf2 also checks for > 0.
+ if (!IsExact || !IntVal.isPowerOf2()) return false;
+ unsigned FBits = IntVal.logBase2();
+
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding FBits, but it must still be in range.
+ if (FBits == 0 || FBits > RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+ return true;
+}
+
+bool
+AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ switch (ConstraintCode) {
+ default: llvm_unreachable("Unrecognised AArch64 memory constraint");
+ case 'm':
+ // FIXME: more freedom is actually permitted for 'm'. We can go
+ // hunting for a base and an offset if we want. Of course, since
+ // we don't really know how the operand is going to be used we're
+ // probably restricted to the load/store pair's simm7 as an offset
+ // range anyway.
+ case 'Q':
+ OutOps.push_back(Op);
+ }
+
+ return false;
+}
+
+bool
+AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
+ ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
+ if (!Imm || !Imm->getValueAPF().isPosZero())
+ return false;
+
+ // Doesn't actually carry any information, but keeps TableGen quiet.
+ Dummy = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
+ uint32_t Bits;
+ uint32_t RegWidth = N.getValueType().getSizeInBits();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) return false;
+
+ if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+ return false;
+
+ Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+ return true;
+}
+
+SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
+ SDNode *ResNode;
+ DebugLoc dl = Node->getDebugLoc();
+ EVT DestType = Node->getValueType(0);
+ unsigned DestWidth = DestType.getSizeInBits();
+
+ unsigned MOVOpcode;
+ EVT MOVType;
+ int UImm16, Shift;
+ uint32_t LogicalBits;
+
+ uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
+ if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
+ MOVType = DestType;
+ MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
+ } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
+ MOVType = DestType;
+ MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
+ } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
+ // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
+ // use a 32-bit instruction: "movn w0, 0xedbc".
+ MOVType = MVT::i32;
+ MOVOpcode = AArch64::MOVNwii;
+ } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) {
+ MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
+ uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
+
+ return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
+ CurDAG->getRegister(ZR, DestType),
+ CurDAG->getTargetConstant(LogicalBits, MVT::i32));
+ } else {
+ // Can't handle it in one instruction. There's scope for permitting two (or
+ // more) instructions, but that'll need more thought.
+ return NULL;
+ }
+
+ ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
+ CurDAG->getTargetConstant(UImm16, MVT::i32),
+ CurDAG->getTargetConstant(Shift, MVT::i32));
+
+ if (MOVType != DestType) {
+ ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
+ MVT::i64, MVT::i32, MVT::Other,
+ CurDAG->getTargetConstant(0, MVT::i64),
+ SDValue(ResNode, 0),
+ CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+ }
+
+ return ResNode;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
+ DebugLoc DL = Node->getDebugLoc();
+ uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
+ int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
+ EVT DestType = Node->getValueType(0);
+ EVT PtrVT = TLI.getPointerTy();
+
+ // Since we may end up loading a 64-bit constant from a 32-bit entry the
+ // constant in the pool may have a different type to the eventual node.
+ ISD::LoadExtType Extension;
+ EVT MemType;
+
+ assert((DestType == MVT::i64 || DestType == MVT::i32)
+ && "Only expect integer constants at the moment");
+
+ if (DestType == MVT::i32) {
+ Extension = ISD::NON_EXTLOAD;
+ MemType = MVT::i32;
+ } else if (UnsignedVal <= UINT32_MAX) {
+ Extension = ISD::ZEXTLOAD;
+ MemType = MVT::i32;
+ } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
+ Extension = ISD::SEXTLOAD;
+ MemType = MVT::i32;
+ } else {
+ Extension = ISD::NON_EXTLOAD;
+ MemType = MVT::i64;
+ }
+
+ Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
+ MemType.getSizeInBits()),
+ UnsignedVal);
+ SDValue PoolAddr;
+ unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+ PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+
+ return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
+ PoolAddr,
+ MachinePointerInfo::getConstantPool(), MemType,
+ /* isVolatile = */ false,
+ /* isNonTemporal = */ false,
+ Alignment).getNode();
+}
+
+SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
+ DebugLoc DL = Node->getDebugLoc();
+ const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
+ EVT PtrVT = TLI.getPointerTy();
+ EVT DestType = Node->getValueType(0);
+
+ unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
+ SDValue PoolAddr;
+
+ assert(TM.getCodeModel() == CodeModel::Small &&
+ "Only small code model supported");
+ PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+
+ return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /* isVolatile = */ false,
+ /* isNonTemporal = */ false,
+ /* isInvariant = */ true,
+ Alignment).getNode();
+}
+
+bool
+AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) return false;
+
+ uint64_t Val = CN->getZExtValue();
+
+ if (!isPowerOf2_64(Val)) return false;
+
+ unsigned TestedBit = Log2_64(Val);
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding TestedBit, but it must still be in range.
+ if (TestedBit >= RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
+ return true;
+}
+
+SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
+ // Dump information about the Node being selected
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+
+ if (Node->isMachineOpcode()) {
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ return NULL;
+ }
+
+ switch (Node->getOpcode()) {
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT PtrTy = TLI.getPointerTy();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
+ TFI, CurDAG->getTargetConstant(0, PtrTy));
+ }
+ case ISD::ConstantPool: {
+ // Constant pools are fine, just create a Target entry.
+ ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
+ const Constant *C = CN->getConstVal();
+ SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
+
+ ReplaceUses(SDValue(Node, 0), CP);
+ return NULL;
+ }
+ case ISD::Constant: {
+ SDNode *ResNode = 0;
+ if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
+ // XZR and WZR are probably even better than an actual move: most of the
+ // time they can be folded into another instruction with *no* cost.
+
+ EVT Ty = Node->getValueType(0);
+ assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
+ uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
+ ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ Node->getDebugLoc(),
+ Register, Ty).getNode();
+ }
+
+ // Next best option is a move-immediate, see if we can do that.
+ if (!ResNode) {
+ ResNode = TrySelectToMoveImm(Node);
+ }
+
+ if (ResNode)
+ return ResNode;
+
+ // If even that fails we fall back to a lit-pool entry at the moment. Future
+ // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
+ ResNode = SelectToLitPool(Node);
+ assert(ResNode && "We need *some* way to materialise a constant");
+
+ // We want to continue selection at this point since the litpool access
+ // generated used generic nodes for simplicity.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ Node = ResNode;
+ break;
+ }
+ case ISD::ConstantFP: {
+ if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
+ // FMOV will take care of it from TableGen
+ break;
+ }
+
+ SDNode *ResNode = LowerToFPLitPool(Node);
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+
+ // We want to continue selection at this point since the litpool access
+ // generated used generic nodes for simplicity.
+ Node = ResNode;
+ break;
+ }
+ default:
+ break; // Let generic code handle it
+ }
+
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << "\n");
+
+ return ResNode;
+}
+
+/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
+/// instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new AArch64DAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
new file mode 100644
index 0000000..cea7f91
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -0,0 +1,2976 @@
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64-isel"
+#include "AArch64.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64TargetObjectFile.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+
+using namespace llvm;
+
+static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
+ const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
+ if (Subtarget->isTargetLinux())
+ return new AArch64LinuxTargetObjectFile();
+ if (Subtarget->isTargetELF())
+ return new TargetLoweringObjectFileELF();
+ llvm_unreachable("unknown subtarget type");
+}
+
+
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+ RegInfo(TM.getRegisterInfo()),
+ Itins(TM.getInstrItineraryData()) {
+
+ // SIMD compares set the entire lane's bits to 1
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // Scalar register <-> type mapping
+ addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
+ addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
+ addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+ computeRegisterProperties();
+
+ // Some atomic operations can be folded into load-acquire or store-release
+ // instructions on AArch64. It's marginally simpler to let LLVM expand
+ // everything out to a barrier and then recombine the (few) barriers we can.
+ setInsertFencesForAtomic(true);
+ setTargetDAGCombine(ISD::ATOMIC_FENCE);
+ setTargetDAGCombine(ISD::ATOMIC_STORE);
+
+ // We combine OR nodes for bitfield and NEON BSL operations.
+ setTargetDAGCombine(ISD::OR);
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::SRA);
+
+ // AArch64 does not have i1 loads, or much of anything for i1 really.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+
+ setStackPointerRegisterToSaveRestore(AArch64::XSP);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+
+ // We'll lower globals to wrappers for selection.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ // A64 instructions have the comparison predicate attached to the user of the
+ // result, but having a separate comparison is valuable for matching.
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ // Legal floating-point operations.
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Legal);
+
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Legal);
+
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FSQRT, MVT::f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::f64, Legal);
+
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
+
+ // Illegal floating-point operations.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f64, Expand);
+
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+
+ // Virtually no operation on f128 is legal, but LLVM can't expand them when
+ // there's a valid register class, so we need custom operations in most cases.
+ setOperationAction(ISD::FABS, MVT::f128, Expand);
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+
+ // Lowering for many of the conversions is actually specified by the non-f128
+ // type. The LowerXXX function will be trivial when f128 isn't involved.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+
+ // This prevents LLVM trying to compress double constants into a floating
+ // constant-pool entry and trying to load from there. It's of doubtful benefit
+ // for A64: we'd need LDR followed by FCVT, I believe.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+
+ setExceptionPointerRegister(AArch64::X0);
+ setExceptionSelectorRegister(AArch64::X1);
+}
+
+EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
+ // It's reasonably important that this value matches the "natural" legal
+ // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
+ // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
+ if (!VT.isVector()) return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
+ unsigned &strOpc) {
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for atomic binary op!");
+ case 1:
+ ldrOpc = AArch64::LDXR_byte;
+ strOpc = AArch64::STXR_byte;
+ break;
+ case 2:
+ ldrOpc = AArch64::LDXR_hword;
+ strOpc = AArch64::STXR_hword;
+ break;
+ case 4:
+ ldrOpc = AArch64::LDXR_word;
+ strOpc = AArch64::STXR_word;
+ break;
+ case 8:
+ ldrOpc = AArch64::LDXR_dword;
+ strOpc = AArch64::STXR_dword;
+ break;
+ }
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ const TargetRegisterClass *TRC
+ = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldxr dest, ptr
+ // <binop> scratch, dest, incr
+ // stxr stxr_status, scratch, ptr
+ // cmp stxr_status, #0
+ // b.ne loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ if (BinOpcode) {
+ // All arithmetic operations we'll be creating are designed to take an extra
+ // shift or extend operand, which we can conveniently set to zero.
+
+ // Operand order needs to go the other way for NAND.
+ if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
+ BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+ .addReg(incr).addReg(dest).addImm(0);
+ else
+ BuildMI(BB, dl, TII->get(BinOpcode), scratch)
+ .addReg(dest).addReg(incr).addImm(0);
+ }
+
+ // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
+ .addReg(stxr_status).addImm(0);
+ BuildMI(BB, dl, TII->get(AArch64::Bcc))
+ .addImm(A64CC::NE).addMBB(loopMBB);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned CmpOp,
+ A64CC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *TRC, *TRCsp;
+ if (Size == 8) {
+ TRC = &AArch64::GPR64RegClass;
+ TRCsp = &AArch64::GPR64xspRegClass;
+ } else {
+ TRC = &AArch64::GPR32RegClass;
+ TRCsp = &AArch64::GPR32wspRegClass;
+ }
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ MRI.constrainRegClass(scratch, TRCsp);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldxr dest, ptr
+ // cmp incr, dest (, sign extend if necessary)
+ // csel scratch, dest, incr, cond
+ // stxr stxr_status, scratch, ptr
+ // cmp stxr_status, #0
+ // b.ne loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+ // Build compare and cmov instructions.
+ MRI.constrainRegClass(incr, TRCsp);
+ BuildMI(BB, dl, TII->get(CmpOp))
+ .addReg(incr).addReg(oldval).addImm(0);
+
+ BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
+ scratch)
+ .addReg(oldval).addReg(incr).addImm(Cond);
+
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status)
+ .addReg(scratch).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
+ .addReg(stxr_status).addImm(0);
+ BuildMI(BB, dl, TII->get(AArch64::Bcc))
+ .addImm(A64CC::NE).addMBB(loopMBB);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *TRCsp;
+ TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
+
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(Size, ldrOpc, strOpc);
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldxr dest, [ptr]
+ // cmp dest, oldval
+ // b.ne exitMBB
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+
+ unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
+ MRI.constrainRegClass(dest, TRCsp);
+ BuildMI(BB, dl, TII->get(CmpOp))
+ .addReg(dest).addReg(oldval).addImm(0);
+ BuildMI(BB, dl, TII->get(AArch64::Bcc))
+ .addImm(A64CC::NE).addMBB(exitMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex stxr_status, newval, [ptr]
+ // cmp stxr_status, #0
+ // b.ne loop1MBB
+ BB = loop2MBB;
+ unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+
+ BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
+ BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
+ .addReg(stxr_status).addImm(0);
+ BuildMI(BB, dl, TII->get(AArch64::Bcc))
+ .addImm(A64CC::NE).addMBB(loop1MBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // We materialise the F128CSEL pseudo-instruction using conditional branches
+ // and loads, giving an instruciton sequence like:
+ // str q0, [sp]
+ // b.ne IfTrue
+ // b Finish
+ // IfTrue:
+ // str q1, [sp]
+ // Finish:
+ // ldr q0, [sp]
+ //
+ // Using virtual registers would probably not be beneficial since COPY
+ // instructions are expensive for f128 (there's no actual instruction to
+ // implement them).
+ //
+ // An alternative would be to do an integer-CSEL on some address. E.g.:
+ // mov x0, sp
+ // add x1, sp, #16
+ // str q0, [x0]
+ // str q1, [x1]
+ // csel x0, x0, x1, ne
+ // ldr q0, [x0]
+ //
+ // It's unclear which approach is actually optimal.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineFunction *MF = MBB->getParent();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned IfTrueReg = MI->getOperand(1).getReg();
+ unsigned IfFalseReg = MI->getOperand(2).getReg();
+ unsigned CondCode = MI->getOperand(3).getImm();
+ bool NZCVKilled = MI->getOperand(4).isKill();
+
+ MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, TrueBB);
+ MF->insert(It, EndBB);
+
+ // Transfer rest of current basic-block to EndBB
+ EndBB->splice(EndBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // We need somewhere to store the f128 value needed.
+ int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
+
+ // [... start of incoming MBB ...]
+ // str qIFFALSE, [sp]
+ // b.cc IfTrue
+ // b Done
+ BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
+ .addReg(IfFalseReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+ BuildMI(MBB, DL, TII->get(AArch64::Bcc))
+ .addImm(CondCode)
+ .addMBB(TrueBB);
+ BuildMI(MBB, DL, TII->get(AArch64::Bimm))
+ .addMBB(EndBB);
+ MBB->addSuccessor(TrueBB);
+ MBB->addSuccessor(EndBB);
+
+ // IfTrue:
+ // str qIFTRUE, [sp]
+ BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
+ .addReg(IfTrueReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+
+ // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
+ // blocks.
+ TrueBB->addSuccessor(EndBB);
+
+ // Done:
+ // ldr qDEST, [sp]
+ // [... rest of incoming MBB ...]
+ if (!NZCVKilled)
+ EndBB->addLiveIn(AArch64::NZCV);
+ MachineInstr *StartOfEnd = EndBB->begin();
+ BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
+ .addFrameIndex(ScratchFI)
+ .addImm(0);
+
+ MI->eraseFromParent();
+ return EndBB;
+}
+
+MachineBasicBlock *
+AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unhandled instruction with custom inserter");
+ case AArch64::F128CSEL:
+ return EmitF128CSEL(MI, MBB);
+ case AArch64::ATOMIC_LOAD_ADD_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
+ case AArch64::ATOMIC_LOAD_ADD_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_SUB_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
+ case AArch64::ATOMIC_LOAD_SUB_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_AND_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
+ case AArch64::ATOMIC_LOAD_AND_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_OR_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
+ case AArch64::ATOMIC_LOAD_OR_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_XOR_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
+ case AArch64::ATOMIC_LOAD_XOR_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_NAND_I8:
+ return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I16:
+ return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I32:
+ return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
+ case AArch64::ATOMIC_LOAD_NAND_I64:
+ return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
+
+ case AArch64::ATOMIC_LOAD_MIN_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
+ case AArch64::ATOMIC_LOAD_MIN_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
+
+ case AArch64::ATOMIC_LOAD_MAX_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
+ case AArch64::ATOMIC_LOAD_MAX_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
+
+ case AArch64::ATOMIC_LOAD_UMIN_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
+ case AArch64::ATOMIC_LOAD_UMIN_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
+
+ case AArch64::ATOMIC_LOAD_UMAX_I8:
+ return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I16:
+ return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I32:
+ return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
+ case AArch64::ATOMIC_LOAD_UMAX_I64:
+ return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
+
+ case AArch64::ATOMIC_SWAP_I8:
+ return emitAtomicBinary(MI, MBB, 1, 0);
+ case AArch64::ATOMIC_SWAP_I16:
+ return emitAtomicBinary(MI, MBB, 2, 0);
+ case AArch64::ATOMIC_SWAP_I32:
+ return emitAtomicBinary(MI, MBB, 4, 0);
+ case AArch64::ATOMIC_SWAP_I64:
+ return emitAtomicBinary(MI, MBB, 8, 0);
+
+ case AArch64::ATOMIC_CMP_SWAP_I8:
+ return emitAtomicCmpSwap(MI, MBB, 1);
+ case AArch64::ATOMIC_CMP_SWAP_I16:
+ return emitAtomicCmpSwap(MI, MBB, 2);
+ case AArch64::ATOMIC_CMP_SWAP_I32:
+ return emitAtomicCmpSwap(MI, MBB, 4);
+ case AArch64::ATOMIC_CMP_SWAP_I64:
+ return emitAtomicCmpSwap(MI, MBB, 8);
+ }
+}
+
+
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
+ case AArch64ISD::Call: return "AArch64ISD::Call";
+ case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
+ case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
+ case AArch64ISD::BFI: return "AArch64ISD::BFI";
+ case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
+ case AArch64ISD::Ret: return "AArch64ISD::Ret";
+ case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
+ case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
+ case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
+ case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
+ case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
+ case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
+ case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
+
+ default: return NULL;
+ }
+}
+
+static const uint16_t AArch64FPRArgRegs[] = {
+ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+ AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
+};
+static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
+
+static const uint16_t AArch64ArgRegs[] = {
+ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
+ AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
+};
+static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
+
+static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ // Mark all remaining general purpose registers as allocated. We don't
+ // backtrack: if (for example) an i128 gets put on the stack, no subsequent
+ // i64 will go in registers (C.11).
+ for (unsigned i = 0; i < NumArgRegs; ++i)
+ State.AllocateReg(AArch64ArgRegs[i]);
+
+ return false;
+}
+
+#include "AArch64GenCallingConv.inc"
+
+CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
+
+ switch(CC) {
+ default: llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return CC_A64_APCS;
+ }
+}
+
+void
+AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc DL, SDValue &Chain) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+
+ SmallVector<SDValue, 8> MemOps;
+
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
+ NumArgRegs);
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
+ NumFPRArgRegs);
+
+ unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
+ int GPRIdx = 0;
+ if (GPRSaveSize != 0) {
+ GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 8),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ }
+ }
+
+ unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
+ int FPRIdx = 0;
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+ &AArch64::FPR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 16),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
+ }
+
+ int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
+
+ FuncInfo->setVariadicStackIdx(StackIdx);
+ FuncInfo->setVariadicGPRIdx(GPRIdx);
+ FuncInfo->setVariadicGPRSize(GPRSaveSize);
+ FuncInfo->setVariadicFPRIdx(FPRIdx);
+ FuncInfo->setVariadicFPRSize(FPRSaveSize);
+
+ if (!MemOps.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+ MemOps.size());
+ }
+}
+
+
+SDValue
+AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ SDValue ArgValue;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Byval is used for small structs and HFAs in the PCS, but the system
+ // should work in a non-compliant manner for larger structs.
+ EVT PtrTy = getPointerTy();
+ int Size = Flags.getByValSize();
+ unsigned NumRegs = (Size + 7) / 8;
+
+ unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
+ VA.getLocMemOffset(),
+ false);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ InVals.push_back(FrameIdxN);
+
+ continue;
+ } else if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+ } else { // VA.isRegLoc()
+ assert(VA.isMemLoc());
+
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+
+
+ }
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt: {
+ unsigned DestSize = VA.getValVT().getSizeInBits();
+ unsigned DestSubReg;
+
+ switch (DestSize) {
+ case 8: DestSubReg = AArch64::sub_8; break;
+ case 16: DestSubReg = AArch64::sub_16; break;
+ case 32: DestSubReg = AArch64::sub_32; break;
+ case 64: DestSubReg = AArch64::sub_64; break;
+ default: llvm_unreachable("Unexpected argument promotion");
+ }
+
+ ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+ VA.getValVT(), ArgValue,
+ DAG.getTargetConstant(DestSubReg, MVT::i32)),
+ 0);
+ break;
+ }
+ }
+
+ InVals.push_back(ArgValue);
+ }
+
+ if (isVarArg)
+ SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
+
+ unsigned StackArgSize = CCInfo.getNextStackOffset();
+ if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
+ // This is a non-standard ABI so by fiat I say we're allowed to make full
+ // use of the stack area to be popped, which must be aligned to 16 bytes in
+ // any case:
+ StackArgSize = RoundUpToAlignment(StackArgSize, 16);
+
+ // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
+ // a multiple of 16.
+ FuncInfo->setArgumentStackToRestore(StackArgSize);
+
+ // This realignment carries over to the available bytes below. Our own
+ // callers will guarantee the space is free by giving an aligned value to
+ // CALLSEQ_START.
+ }
+ // Even if we're not expected to free up the space, it's useful to know how
+ // much is there while considering tail calls (because we can reuse it).
+ FuncInfo->setBytesInStackArgArea(StackArgSize);
+
+ return Chain;
+}
+
+SDValue
+AArch64TargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze outgoing return values.
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ // PCS: "If the type, T, of the result of a function is such that
+ // void func(T arg) would require that arg be passed as a value in a
+ // register (or set of registers) according to the rules in 5.4, then the
+ // result is returned in the same registers as would be used for such an
+ // argument.
+ //
+ // Otherwise, the caller shall reserve a block of memory of sufficient
+ // size and alignment to hold the result. The address of the memory block
+ // shall be passed as an additional argument to the function in x8."
+ //
+ // This is implemented in two places. The register-return values are dealt
+ // with here, more complex returns are passed as an sret parameter, which
+ // means we don't have to worry about it during actual return.
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
+
+
+ SDValue Arg = OutVals[i];
+
+ // There's no convenient note in the ABI about this as there is for normal
+ // arguments, but it says return values are passed in the same registers as
+ // an argument would be. I believe that includes the comments about
+ // unspecified higher bits, putting the burden of widening on the *caller*
+ // for return values.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt:
+ // Floating-point values should only be extended when they're going into
+ // memory, which can't happen here so an integer extend is acceptable.
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+ bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+
+ if (IsTailCall) {
+ IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+
+ // A sibling call is one where we're under the usual C ABI and not planning
+ // to change that but can still do a tail call:
+ if (!TailCallOpt && IsTailCall)
+ IsSibCall = true;
+ }
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ // On AArch64 (and all other architectures I'm aware of) the most this has to
+ // do is adjust the stack pointer.
+ unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
+ if (IsSibCall) {
+ // Since we're not changing the ABI to make this a tail call, the memory
+ // operands are already available in the caller's incoming argument space.
+ NumBytes = 0;
+ }
+
+ // FPDiff is the byte offset of the call's argument area from the callee's.
+ // Stores to callee stack arguments will be placed in FixedStackSlots offset
+ // by this amount for a tail call. In a sibling call it must be 0 because the
+ // caller will deallocate the entire stack and the callee still expects its
+ // arguments to begin at SP+0. Completely unused for non-tail calls.
+ int FPDiff = 0;
+
+ if (IsTailCall && !IsSibCall) {
+ unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+
+ // FPDiff will be negative if this tail call requires more space than we
+ // would automatically have in our incoming argument space. Positive if we
+ // can actually shrink the stack.
+ FPDiff = NumReusableBytes - NumBytes;
+
+ // The stack pointer must be 16-byte aligned at all times it's used for a
+ // memory operation, which in practice means at *all* times and in
+ // particular across call boundaries. Therefore our own arguments started at
+ // a 16-byte aligned SP and the delta applied for the tail call should
+ // satisfy the same constraint.
+ assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+ }
+
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
+ getPointerTy());
+
+ SmallVector<SDValue, 8> MemOpChains;
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue Arg = OutVals[i];
+
+ // Callee does the actual widening, so all extensions just use an implicit
+ // definition of the rest of the Loc. Aesthetically, this would be nicer as
+ // an ANY_EXTEND, but that isn't valid for floating-point types and this
+ // alternative works on integer types too.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ case CCValAssign::AExt: {
+ unsigned SrcSize = VA.getValVT().getSizeInBits();
+ unsigned SrcSubReg;
+
+ switch (SrcSize) {
+ case 8: SrcSubReg = AArch64::sub_8; break;
+ case 16: SrcSubReg = AArch64::sub_16; break;
+ case 32: SrcSubReg = AArch64::sub_32; break;
+ case 64: SrcSubReg = AArch64::sub_64; break;
+ default: llvm_unreachable("Unexpected argument promotion");
+ }
+
+ Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ VA.getLocVT(),
+ DAG.getUNDEF(VA.getLocVT()),
+ Arg,
+ DAG.getTargetConstant(SrcSubReg, MVT::i32)),
+ 0);
+
+ break;
+ }
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ // A normal register (sub-) argument. For now we just note it down because
+ // we want to copy things into registers as late as possible to avoid
+ // register-pressure (and possibly worse).
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc() && "unexpected argument location");
+
+ SDValue DstAddr;
+ MachinePointerInfo DstInfo;
+ if (IsTailCall) {
+ uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
+ VA.getLocVT().getSizeInBits();
+ OpSize = (OpSize + 7) / 8;
+ int32_t Offset = VA.getLocMemOffset() + FPDiff;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+
+ DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Make sure any stack arguments overlapping with where we're storing are
+ // loaded before this eventual operation. Otherwise they'll be clobbered.
+ Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+ } else {
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+
+ DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
+ }
+
+ if (Flags.isByVal()) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
+ SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile = */ false,
+ /*alwaysInline = */ false,
+ DstInfo, MachinePointerInfo(0));
+ MemOpChains.push_back(Cpy);
+ } else {
+ // Normal stack argument, put it where it's needed.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
+ false, false, 0);
+ MemOpChains.push_back(Store);
+ }
+ }
+
+ // The loads and stores generated above shouldn't clash with each
+ // other. Combining them with this TokenFactor notes that fact for the rest of
+ // the backend.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Most of the rest of the instructions need to be glued together; we don't
+ // want assignments to actual registers used by a call to be rearranged by a
+ // well-meaning scheduler.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // The linker is responsible for inserting veneers when necessary to put a
+ // function call destination in range, so we don't need to bother with a
+ // wrapper here.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ }
+
+ // We don't usually want to end the call-sequence here because we would tidy
+ // the frame up *after* the call, however in the ABI-changing tail-call case
+ // we've carefully laid out the parameters so that when sp is reset they'll be
+ // in the correct location.
+ if (IsTailCall && !IsSibCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // We produce the following DAG scheme for the actual call instruction:
+ // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
+ //
+ // Most arguments aren't going to be used and just keep the values live as
+ // far as LLVM is concerned. It's expected to be selected as simply "bl
+ // callee" (for a direct, non-tail call).
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (IsTailCall) {
+ // Each tail call may have to adjust the stack by a different amount, so
+ // this information must travel along with the operation for eventual
+ // consumption by emitEpilogue.
+ Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+ }
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+
+ // Add a register mask operand representing the call-preserved registers. This
+ // is used later in codegen to constrain register-allocation.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // If we needed glue, put it in as the last argument.
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ if (IsTailCall) {
+ return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ }
+
+ Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Now we can reclaim the stack, just as well do it before working out where
+ // our return value is.
+ if (!IsSibCall) {
+ uint64_t CalleePopBytes
+ = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(CalleePopBytes, true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ return LowerCallResult(Chain, InFlag, CallConv,
+ IsVarArg, Ins, dl, DAG, InVals);
+}
+
+SDValue
+AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
+
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ // Return values that are too big to fit into registers should use an sret
+ // pointer, so this can be a lot simpler than the main argument code.
+ assert(VA.isRegLoc() && "Memory locations not expected for call return");
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ case CCValAssign::SExt:
+ case CCValAssign::AExt:
+ // Floating-point arguments only get extended/truncated if they're going
+ // in memory, so using the integer operation is acceptable here.
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+bool
+AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+
+ // For CallingConv::C this function knows whether the ABI needs
+ // changing. That's not true for other conventions so they will have to opt in
+ // manually.
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Byval parameters hand the function a pointer directly into the stack area
+ // we want to reuse during a tail call. Working around this *is* possible (see
+ // X86) but less efficient and uglier in LowerCall.
+ for (Function::const_arg_iterator i = CallerF->arg_begin(),
+ e = CallerF->arg_end(); i != e; ++i)
+ if (i->hasByValAttr())
+ return false;
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ if (IsTailCallConvention(CalleeCC) && CCMatch)
+ return true;
+ return false;
+ }
+
+ // Now we search for cases where we can use a tail call without changing the
+ // ABI. Sibcall is used in some places (particularly gcc) to refer to this
+ // concept.
+
+ // I want anyone implementing a new calling convention to think long and hard
+ // about this assert.
+ assert((!IsVarArg || CalleeCC == CallingConv::C)
+ && "Unexpected variadic calling convention");
+
+ if (IsVarArg && !Outs.empty()) {
+ // At least two cases here: if caller is fastcc then we can't have any
+ // memory arguments (we'd be expected to clean up the stack afterwards). If
+ // caller is C then we could potentially use its argument area.
+
+ // FIXME: for now we take the most conservative of these in both cases:
+ // disallow all variadic memory operands.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
+ if (!ArgLocs[i].isRegLoc())
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // Nothing more to check if the callee is taking no arguments
+ if (Outs.empty())
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+ const AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+
+ // If the stack arguments for this call would fit into our own save area then
+ // the call can be made tail.
+ return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
+}
+
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+ bool TailCallOpt) const {
+ return CallCC == CallingConv::Fast && TailCallOpt;
+}
+
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+ return CallCC == CallingConv::Fast;
+}
+
+SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
+ SelectionDAG &DAG,
+ MachineFrameInfo *MFI,
+ int ClobberedFI) const {
+ SmallVector<SDValue, 8> ArgChains;
+ int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
+ int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument corresponding
+ for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
+ UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0) {
+ int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
+ int64_t InLastByte = InFirstByte;
+ InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
+
+ if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
+ (FirstByte <= InFirstByte && InFirstByte <= LastByte))
+ ArgChains.push_back(SDValue(L, 1));
+ }
+
+ // Build a tokenfactor for all the chains.
+ return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+ &ArgChains[0], ArgChains.size());
+}
+
+static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETEQ: return A64CC::EQ;
+ case ISD::SETGT: return A64CC::GT;
+ case ISD::SETGE: return A64CC::GE;
+ case ISD::SETLT: return A64CC::LT;
+ case ISD::SETLE: return A64CC::LE;
+ case ISD::SETNE: return A64CC::NE;
+ case ISD::SETUGT: return A64CC::HI;
+ case ISD::SETUGE: return A64CC::HS;
+ case ISD::SETULT: return A64CC::LO;
+ case ISD::SETULE: return A64CC::LS;
+ default: llvm_unreachable("Unexpected condition code");
+ }
+}
+
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
+ // icmp is implemented using adds/subs immediate, which take an unsigned
+ // 12-bit immediate, optionally shifted left by 12 bits.
+
+ // Symmetric by using adds/subs
+ if (Val < 0)
+ Val = -Val;
+
+ return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
+}
+
+SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue &A64cc,
+ SelectionDAG &DAG, DebugLoc &dl) const {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ int64_t C = 0;
+ EVT VT = RHSC->getValueType(0);
+ bool knownInvalid = false;
+
+ // I'm not convinced the rest of LLVM handles these edge cases properly, but
+ // we can at least get it right.
+ if (isSignedIntSetCC(CC)) {
+ C = RHSC->getSExtValue();
+ } else if (RHSC->getZExtValue() > INT64_MAX) {
+ // A 64-bit constant not representable by a signed 64-bit integer is far
+ // too big to fit into a SUBS immediate anyway.
+ knownInvalid = true;
+ } else {
+ C = RHSC->getZExtValue();
+ }
+
+ if (!knownInvalid && !isLegalICmpImmediate(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, VT);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (isLegalICmpImmediate(C-1)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, VT);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, VT);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (isLegalICmpImmediate(C+1)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, VT);
+ }
+ break;
+ }
+ }
+ }
+
+ A64CC::CondCodes CondCode = IntCCToA64CC(CC);
+ A64cc = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+}
+
+static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
+ A64CC::CondCodes &Alternative) {
+ A64CC::CondCodes CondCode = A64CC::Invalid;
+ Alternative = A64CC::Invalid;
+
+ switch (CC) {
+ default: llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = A64CC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = A64CC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = A64CC::GE; break;
+ case ISD::SETOLT: CondCode = A64CC::MI; break;
+ case ISD::SETOLE: CondCode = A64CC::LS; break;
+ case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
+ case ISD::SETO: CondCode = A64CC::VC; break;
+ case ISD::SETUO: CondCode = A64CC::VS; break;
+ case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
+ case ISD::SETUGT: CondCode = A64CC::HI; break;
+ case ISD::SETUGE: CondCode = A64CC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = A64CC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = A64CC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = A64CC::NE; break;
+ }
+ return CondCode;
+}
+
+SDValue
+AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small
+ && "Only small code model supported at the moment");
+
+ // The most efficient code is PC-relative anyway for the small memory model,
+ // so we don't need to worry about relocation model.
+ return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+}
+
+
+// (BRCOND chain, val, dest)
+SDValue
+AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0);
+ SDValue TheBit = Op.getOperand(1);
+ SDValue DestBB = Op.getOperand(2);
+
+ // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+ // that as the consumer we are responsible for ignoring rubbish in higher
+ // bits.
+ TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+ DAG.getConstant(0, TheBit.getValueType()),
+ DAG.getCondCode(ISD::SETNE));
+
+ return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
+ A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
+ DestBB);
+}
+
+// (BR_CC chain, condcode, lhs, rhs, dest)
+SDValue
+AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue DestBB = Op.getOperand(4);
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons are lowered to runtime calls by a routine which sets
+ // LHS, RHS and CC appropriately for the rest of this function to continue.
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (RHS.getNode() == 0) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ Chain, CmpOp, A64cc, DestBB);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ Chain, SetCC, A64cc, DestBB);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
+ A64BR_CC, SetCC, A64cc, DestBB);
+
+ }
+
+ return A64BR_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ ArgListTy Args;
+ ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
+
+ Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ SDValue TCChain = InChain;
+ bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
+ 0, getLibcallCallingConv(Call), isTailCall,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ return CallInfo.first;
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, Op.getDebugLoc());
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+ // we make that distinction here.
+
+ // We support the small memory model for now.
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small);
+
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
+ unsigned Alignment = GV->getAlignment();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+
+ if (GV->isWeakForLinker() && RelocM == Reloc::Static) {
+ // Weak symbols can't use ADRP/ADD pair since they should evaluate to
+ // zero when undefined. In PIC mode the GOT can take care of this, but in
+ // absolute mode we use a constant pool load.
+ SDValue PoolAddr;
+ PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(),
+ /*isVolatile=*/ false, /*isNonTemporal=*/ true,
+ /*isInvariant=*/ true, 8);
+ }
+
+ if (Alignment == 0) {
+ const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
+ if (GVPtrTy->getElementType()->isSized()) {
+ Alignment
+ = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
+ } else {
+ // Be conservative if we can't guess, not that it really matters:
+ // functions and labels aren't valid for loads, and the methods used to
+ // actually calculate an address work with any alignment.
+ Alignment = 1;
+ }
+ }
+
+ unsigned char HiFixup, LoFixup;
+ bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+
+ if (UseGOT) {
+ HiFixup = AArch64II::MO_GOT;
+ LoFixup = AArch64II::MO_GOT_LO12;
+ Alignment = 8;
+ } else {
+ HiFixup = AArch64II::MO_NO_FLAG;
+ LoFixup = AArch64II::MO_LO12;
+ }
+
+ // AArch64's small model demands the following sequence:
+ // ADRP x0, somewhere
+ // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
+ SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ HiFixup),
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ LoFixup),
+ DAG.getConstant(Alignment, MVT::i32));
+
+ if (UseGOT) {
+ GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
+ GlobalRef);
+ }
+
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalRef;
+}
+
+SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
+ SDValue DescAddr,
+ DebugLoc DL,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+
+ // The function we need to call is simply the first entry in the GOT for this
+ // descriptor, load it in preparation.
+ SDValue Func, Chain;
+ Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+ DescAddr);
+
+ // The function takes only one argument: the address of the descriptor itself
+ // in X0.
+ SDValue Glue;
+ Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
+ Glue = Chain.getValue(1);
+
+ // Finally, there's a special calling-convention which means that the lookup
+ // must preserve all registers (except X0, obviously).
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const AArch64RegisterInfo *A64RI
+ = static_cast<const AArch64RegisterInfo *>(TRI);
+ const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
+
+ // We're now ready to populate the argument list, as with a normal call:
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Func);
+ Ops.push_back(SymAddr);
+ Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ Ops.push_back(Glue);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
+ Ops.size());
+ Glue = Chain.getValue(1);
+
+ // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
+ // back to the generic handling code.
+ return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ SDValue TPOff;
+ EVT PtrVT = getPointerTy();
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+
+ SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
+
+ if (Model == TLSModel::InitialExec) {
+ TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_GOTTPREL),
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_GOTTPREL_LO12),
+ DAG.getConstant(8, MVT::i32));
+ TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
+ TPOff);
+ } else if (Model == TLSModel::LocalExec) {
+ SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_TPREL_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_TPREL_G0_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+ TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
+ // Accesses used in this sequence go via the TLS descriptor which lives in
+ // the GOT. Prepare an address we can use to handle this.
+ SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_TLSDESC);
+ SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ AArch64II::MO_TLSDESC_LO12);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ HiDesc, LoDesc,
+ DAG.getConstant(8, MVT::i32));
+ SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
+
+ TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ } else if (Model == TLSModel::LocalDynamic) {
+ // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
+ // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
+ // the beginning of the module's TLS region, followed by a DTPREL offset
+ // calculation.
+
+ // These accesses will need deduplicating if there's more than one.
+ AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
+ .getInfo<AArch64MachineFunctionInfo>();
+ MFI->incNumLocalDynamicTLSAccesses();
+
+
+ // Get the location of _TLS_MODULE_BASE_:
+ SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLSDESC);
+ SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLSDESC_LO12);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ HiDesc, LoDesc,
+ DAG.getConstant(8, MVT::i32));
+ SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
+
+ ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+
+ // Get the variable's offset from _TLS_MODULE_BASE_
+ SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_DTPREL_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
+ AArch64II::MO_DTPREL_G0_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
+ TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)), 0);
+ } else
+ llvm_unreachable("Unsupported TLS access model");
+
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
+}
+
+SDValue
+AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned) const {
+ if (Op.getValueType() != MVT::f128) {
+ // Legal for everything except f128.
+ return Op;
+ }
+
+ RTLIB::Libcall LC;
+ if (IsSigned)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128ToCall(Op, DAG, LC);
+}
+
+
+SDValue
+AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ DebugLoc dl = JT->getDebugLoc();
+
+ // When compiling PIC, jump tables get put in the code section so a static
+ // relocation-style is acceptable for both cases.
+ return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
+ DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
+ DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+ AArch64II::MO_LO12),
+ DAG.getConstant(1, MVT::i32));
+}
+
+// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
+SDValue
+AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue IfTrue = Op.getOperand(2);
+ SDValue IfFalse = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons are lowered to libcalls, but slot in nicely here
+ // afterwards.
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (RHS.getNode() == 0) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ CmpOp, IfTrue, IfFalse, A64cc);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
+ Op.getValueType(),
+ SetCC, IfTrue, IfFalse, A64cc);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ SetCC, IfTrue, A64SELECT_CC, A64cc);
+
+ }
+
+ return A64SELECT_CC;
+}
+
+// (SELECT testbit, iftrue, iffalse)
+SDValue
+AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue TheBit = Op.getOperand(0);
+ SDValue IfTrue = Op.getOperand(1);
+ SDValue IfFalse = Op.getOperand(2);
+
+ // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
+ // that as the consumer we are responsible for ignoring rubbish in higher
+ // bits.
+ TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
+ DAG.getConstant(1, MVT::i32));
+ SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
+ DAG.getConstant(0, TheBit.getValueType()),
+ DAG.getCondCode(ISD::SETNE));
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
+ A64CMP, IfTrue, IfFalse,
+ DAG.getConstant(A64CC::NE, MVT::i32));
+}
+
+// (SETCC lhs, rhs, condcode)
+SDValue
+AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ EVT VT = Op.getValueType();
+
+ if (LHS.getValueType() == MVT::f128) {
+ // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
+ // for the rest of the function (some i32 or i64 values).
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (RHS.getNode() == 0) {
+ assert(LHS.getValueType() == Op.getValueType() &&
+ "Unexpected setcc expansion!");
+ return LHS;
+ }
+ }
+
+ if (LHS.getValueType().isInteger()) {
+ SDValue A64cc;
+
+ // Integers are handled in a separate function because the combinations of
+ // immediates and tests can get hairy and we may want to fiddle things.
+ SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+
+ return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+ CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+ A64cc);
+ }
+
+ // Note that some LLVM floating-point CondCodes can't be lowered to a single
+ // conditional branch, hence FPCCToA64CC can set a second test, where either
+ // passing is sufficient.
+ A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
+ CondCode = FPCCToA64CC(CC, Alternative);
+ SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
+ DAG.getCondCode(CC));
+ SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
+ CmpOp, DAG.getConstant(1, VT),
+ DAG.getConstant(0, VT), A64cc);
+
+ if (Alternative != A64CC::Invalid) {
+ A64cc = DAG.getConstant(Alternative, MVT::i32);
+ A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
+ DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
+ }
+
+ return A64SELECT_CC;
+}
+
+SDValue
+AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+ const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+
+ // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
+ // rather than just 8.
+ return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(32, MVT::i32), 8, false, false,
+ MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // The layout of the va_list struct is specified in the AArch64 Procedure Call
+ // Standard, section B.3.
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64MachineFunctionInfo *FuncInfo
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAList = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ SmallVector<SDValue, 4> MemOps;
+
+ // void *__stack at offset 0
+ SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
+ getPointerTy());
+ MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
+ MachinePointerInfo(SV), false, false, 0));
+
+ // void *__gr_top at offset 8
+ int GPRSize = FuncInfo->getVariadicGPRSize();
+ if (GPRSize > 0) {
+ SDValue GRTop, GRTopAddr;
+
+ GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(8, getPointerTy()));
+
+ GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
+ GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
+ DAG.getConstant(GPRSize, getPointerTy()));
+
+ MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
+ MachinePointerInfo(SV, 8),
+ false, false, 0));
+ }
+
+ // void *__vr_top at offset 16
+ int FPRSize = FuncInfo->getVariadicFPRSize();
+ if (FPRSize > 0) {
+ SDValue VRTop, VRTopAddr;
+ VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(16, getPointerTy()));
+
+ VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
+ VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
+ DAG.getConstant(FPRSize, getPointerTy()));
+
+ MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
+ MachinePointerInfo(SV, 16),
+ false, false, 0));
+ }
+
+ // int __gr_offs at offset 24
+ SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(24, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+ GROffsAddr, MachinePointerInfo(SV, 24),
+ false, false, 0));
+
+ // int __vr_offs at offset 28
+ SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(28, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+ VROffsAddr, MachinePointerInfo(SV, 28),
+ false, false, 0));
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
+ MemOps.size());
+}
+
+SDValue
+AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Don't know how to custom lower this!");
+ case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
+ case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
+ case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // We're looking for an SRA/SHL pair which form an SBFX.
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t TruncMask = N->getConstantOperandVal(1);
+ if (!isMask_64(TruncMask))
+ return SDValue();
+
+ uint64_t Width = CountPopulation_64(TruncMask);
+ SDValue Shift = N->getOperand(0);
+
+ if (Shift.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ return SDValue();
+ uint64_t LSB = Shift->getConstantOperandVal(1);
+
+ if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // An atomic operation followed by an acquiring atomic fence can be reduced to
+ // an acquiring load. The atomic operation provides a convenient pointer to
+ // load from. If the original operation was a load anyway we can actually
+ // combine the two operations into an acquiring load.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue AtomicOp = FenceNode->getOperand(0);
+ AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
+
+ // A fence on its own can't be optimised
+ if (!AtomicNode)
+ return SDValue();
+
+ AtomicOrdering FenceOrder
+ = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
+ SynchronizationScope FenceScope
+ = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
+
+ if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
+ return SDValue();
+
+ // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
+ // the chain we use should be its input, otherwise we'll put our store after
+ // it so we use its output chain.
+ SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
+ AtomicNode->getChain() : AtomicOp;
+
+ // We have an acquire fence with a handy atomic operation nearby, we can
+ // convert the fence into a load-acquire, discarding the result.
+ DebugLoc DL = FenceNode->getDebugLoc();
+ SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
+ AtomicNode->getValueType(0),
+ Chain, // Chain
+ AtomicOp.getOperand(1), // Pointer
+ AtomicNode->getMemOperand(), Acquire,
+ FenceScope);
+
+ if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
+ DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
+
+ return Op.getValue(1);
+}
+
+static SDValue PerformATOMIC_STORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // A releasing atomic fence followed by an atomic store can be combined into a
+ // single store operation.
+ SelectionDAG &DAG = DCI.DAG;
+ AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
+ SDValue FenceOp = AtomicNode->getOperand(0);
+
+ if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
+ return SDValue();
+
+ AtomicOrdering FenceOrder
+ = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
+ SynchronizationScope FenceScope
+ = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
+
+ if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
+ return SDValue();
+
+ DebugLoc DL = AtomicNode->getDebugLoc();
+ return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
+ FenceOp.getOperand(0), // Chain
+ AtomicNode->getOperand(1), // Pointer
+ AtomicNode->getOperand(2), // Value
+ AtomicNode->getMemOperand(), Release,
+ FenceScope);
+}
+
+/// For a true bitfield insert, the bits getting into that contiguous mask
+/// should come from the low part of an existing value: they must be formed from
+/// a compatible SHL operation (unless they're already low). This function
+/// checks that condition and returns the least-significant bit that's
+/// intended. If the operation not a field preparation, -1 is returned.
+static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+ SDValue &MaskedVal, uint64_t Mask) {
+ if (!isShiftedMask_64(Mask))
+ return -1;
+
+ // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
+ // instruction. BFI will do a left-shift by LSB before applying the mask we've
+ // spotted, so in general we should pre-emptively "undo" that by making sure
+ // the incoming bits have had a right-shift applied to them.
+ //
+ // This right shift, however, will combine with existing left/right shifts. In
+ // the simplest case of a completely straight bitfield operation, it will be
+ // expected to completely cancel out with an existing SHL. More complicated
+ // cases (e.g. bitfield to bitfield copy) may still need a real shift before
+ // the BFI.
+
+ uint64_t LSB = CountTrailingZeros_64(Mask);
+ int64_t ShiftRightRequired = LSB;
+ if (MaskedVal.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+ ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
+ MaskedVal = MaskedVal.getOperand(0);
+ } else if (MaskedVal.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
+ ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
+ MaskedVal = MaskedVal.getOperand(0);
+ }
+
+ if (ShiftRightRequired > 0)
+ MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
+ DAG.getConstant(ShiftRightRequired, MVT::i64));
+ else if (ShiftRightRequired < 0) {
+ // We could actually end up with a residual left shift, for example with
+ // "struc.bitfield = val << 1".
+ MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
+ DAG.getConstant(-ShiftRightRequired, MVT::i64));
+ }
+
+ return LSB;
+}
+
+/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
+/// a mask and an extension. Returns true if a BFI was found and provides
+/// information on its surroundings.
+static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
+ bool &Extended) {
+ Extended = false;
+ if (N.getOpcode() == ISD::ZERO_EXTEND) {
+ Extended = true;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ Mask = N->getConstantOperandVal(1);
+ N = N.getOperand(0);
+ } else {
+ // Mask is the whole width.
+ Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
+ }
+
+ if (N.getOpcode() == AArch64ISD::BFI) {
+ BFI = N;
+ return true;
+ }
+
+ return false;
+}
+
+/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
+/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
+/// can often be further combined with a larger mask. Ultimately, we want mask
+/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
+static SDValue tryCombineToBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+ // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
+ // abandon the effort.
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::AND)
+ return SDValue();
+
+ uint64_t LHSMask;
+ if (isa<ConstantSDNode>(LHS.getOperand(1)))
+ LHSMask = LHS->getConstantOperandVal(1);
+ else
+ return SDValue();
+
+ // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
+ // is or abandon the effort.
+ SDValue RHS = N->getOperand(1);
+ if (RHS.getOpcode() != ISD::AND)
+ return SDValue();
+
+ uint64_t RHSMask;
+ if (isa<ConstantSDNode>(RHS.getOperand(1)))
+ RHSMask = RHS->getConstantOperandVal(1);
+ else
+ return SDValue();
+
+ // Can't do anything if the masks are incompatible.
+ if (LHSMask & RHSMask)
+ return SDValue();
+
+ // Now we need one of the masks to be a contiguous field. Without loss of
+ // generality that should be the RHS one.
+ SDValue Bitfield = LHS.getOperand(0);
+ if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
+ // We know that LHS is a candidate new value, and RHS isn't already a better
+ // one.
+ std::swap(LHS, RHS);
+ std::swap(LHSMask, RHSMask);
+ }
+
+ // We've done our best to put the right operands in the right places, all we
+ // can do now is check whether a BFI exists.
+ Bitfield = RHS.getOperand(0);
+ int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
+ if (LSB == -1)
+ return SDValue();
+
+ uint32_t Width = CountPopulation_64(RHSMask);
+ assert(Width && "Expected non-zero bitfield width");
+
+ SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+ LHS.getOperand(0), Bitfield,
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(Width, MVT::i64));
+
+ // Mask is trivial
+ if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+ return BFI;
+
+ return DAG.getNode(ISD::AND, DL, VT, BFI,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+}
+
+/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
+/// original input. This is surprisingly common because SROA splits things up
+/// into i8 chunks, so the originally detected MaskedBFI may actually only act
+/// on the low (say) byte of a word. This is then orred into the rest of the
+/// word afterwards.
+///
+/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
+///
+/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
+/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
+/// involved.
+static SDValue tryCombineToLargerBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // First job is to hunt for a MaskedBFI on either the left or right. Swap
+ // operands if it's actually on the right.
+ SDValue BFI;
+ SDValue PossExtraMask;
+ uint64_t ExistingMask = 0;
+ bool Extended = false;
+ if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
+ PossExtraMask = N->getOperand(1);
+ else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
+ PossExtraMask = N->getOperand(0);
+ else
+ return SDValue();
+
+ // We can only combine a BFI with another compatible mask.
+ if (PossExtraMask.getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
+ return SDValue();
+
+ uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
+
+ // Masks must be compatible.
+ if (ExtraMask & ExistingMask)
+ return SDValue();
+
+ SDValue OldBFIVal = BFI.getOperand(0);
+ SDValue NewBFIVal = BFI.getOperand(1);
+ if (Extended) {
+ // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
+ // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
+ // need to be made compatible.
+ assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
+ && "Invalid types for BFI");
+ OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
+ NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
+ }
+
+ // We need the MaskedBFI to be combined with a mask of the *same* value.
+ if (PossExtraMask.getOperand(0) != OldBFIVal)
+ return SDValue();
+
+ BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
+ OldBFIVal, NewBFIVal,
+ BFI.getOperand(2), BFI.getOperand(3));
+
+ // If the masking is trivial, we don't need to create it.
+ if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
+ return BFI;
+
+ return DAG.getNode(ISD::AND, DL, VT, BFI,
+ DAG.getConstant(ExtraMask | ExistingMask, VT));
+}
+
+/// An EXTR instruction is made up of two shifts, ORed together. This helper
+/// searches for and classifies those shifts.
+static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
+ bool &FromHi) {
+ if (N.getOpcode() == ISD::SHL)
+ FromHi = false;
+ else if (N.getOpcode() == ISD::SRL)
+ FromHi = true;
+ else
+ return false;
+
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
+ return false;
+
+ ShiftAmount = N->getConstantOperandVal(1);
+ Src = N->getOperand(0);
+ return true;
+}
+
+/// EXTR instruction extracts a contiguous chunk of bits from two existing
+/// registers viewed as a high/low pair. This function looks for the pattern:
+/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
+/// EXTR. Can't quite be done in TableGen because the two immediates aren't
+/// independent.
+static SDValue tryCombineToEXTR(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ assert(N->getOpcode() == ISD::OR && "Unexpected root");
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue LHS;
+ uint32_t ShiftLHS = 0;
+ bool LHSFromHi = 0;
+ if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
+ return SDValue();
+
+ SDValue RHS;
+ uint32_t ShiftRHS = 0;
+ bool RHSFromHi = 0;
+ if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
+ return SDValue();
+
+ // If they're both trying to come from the high part of the register, they're
+ // not really an EXTR.
+ if (LHSFromHi == RHSFromHi)
+ return SDValue();
+
+ if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
+ return SDValue();
+
+ if (LHSFromHi) {
+ std::swap(LHS, RHS);
+ std::swap(ShiftLHS, ShiftRHS);
+ }
+
+ return DAG.getNode(AArch64ISD::EXTR, DL, VT,
+ LHS, RHS,
+ DAG.getConstant(ShiftRHS, MVT::i64));
+}
+
+/// Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // Attempt to recognise bitfield-insert operations.
+ SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
+ if (Res.getNode())
+ return Res;
+
+ // Attempt to combine an existing MaskedBFI operation into one with a larger
+ // mask.
+ Res = tryCombineToLargerBFI(N, DCI, Subtarget);
+ if (Res.getNode())
+ return Res;
+
+ Res = tryCombineToEXTR(N, DCI);
+ if (Res.getNode())
+ return Res;
+
+ return SDValue();
+}
+
+/// Target-specific dag combine xforms for ISD::SRA
+static SDValue PerformSRACombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // We're looking for an SRA/SHL pair which form an SBFX.
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t ExtraSignBits = N->getConstantOperandVal(1);
+ SDValue Shift = N->getOperand(0);
+
+ if (Shift.getOpcode() != ISD::SHL)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ return SDValue();
+
+ uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
+ uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
+ uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
+
+ if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ return SDValue();
+
+ return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
+ DAG.getConstant(LSB, MVT::i64),
+ DAG.getConstant(LSB + Width - 1, MVT::i64));
+}
+
+
+SDValue
+AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
+ case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::SRA: return PerformSRACombine(N, DCI);
+ }
+ return SDValue();
+}
+
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'w': // An FP/SIMD vector register
+ return C_RegisterClass;
+ case 'I': // Constant that can be used with an ADD instruction
+ case 'J': // Constant that can be used with a SUB instruction
+ case 'K': // Constant that can be used with a 32-bit logical instruction
+ case 'L': // Constant that can be used with a 64-bit logical instruction
+ case 'M': // Constant that can be used as a 32-bit MOV immediate
+ case 'N': // Constant that can be used as a 64-bit MOV immediate
+ case 'Y': // Floating point constant zero
+ case 'Z': // Integer constant zero
+ return C_Other;
+ case 'Q': // A memory reference with base register and no offset
+ return C_Memory;
+ case 'S': // A symbolic address
+ return C_Other;
+ }
+ }
+
+ // FIXME: Ump, Utf, Usa, Ush
+ // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
+ // whatever they may be
+ // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
+ // Usa: An absolute symbolic address
+ // Ush: The high part (bits 32:12) of a pc-relative symbolic address
+ assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
+ && Constraint != "Ush" && "Unimplemented constraints");
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight
+AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const {
+
+ llvm_unreachable("Constraint weight unimplemented");
+}
+
+void
+AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ // Only length 1 constraints are C_Other.
+ if (Constraint.size() != 1) return;
+
+ // Only C_Other constraints get lowered like this. That means constants for us
+ // so return early if there's no hope the constraint can be lowered.
+
+ switch(Constraint[0]) {
+ default: break;
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'Z': {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ uint64_t CVal = C->getZExtValue();
+ uint32_t Bits;
+
+ switch (Constraint[0]) {
+ default:
+ // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
+ // is a peculiarly useless SUB constraint.
+ llvm_unreachable("Unimplemented C_Other constraint");
+ case 'I':
+ if (CVal <= 0xfff)
+ break;
+ return;
+ case 'K':
+ if (A64Imms::isLogicalImm(32, CVal, Bits))
+ break;
+ return;
+ case 'L':
+ if (A64Imms::isLogicalImm(64, CVal, Bits))
+ break;
+ return;
+ case 'Z':
+ if (CVal == 0)
+ break;
+ return;
+ }
+
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+ case 'S': {
+ // An absolute symbolic address or label reference.
+ if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
+ GA->getValueType(0));
+ } else if (const BlockAddressSDNode *BA
+ = dyn_cast<BlockAddressSDNode>(Op)) {
+ Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
+ BA->getValueType(0));
+ } else if (const ExternalSymbolSDNode *ES
+ = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
+ ES->getValueType(0));
+ } else
+ return;
+ break;
+ }
+ case 'Y':
+ if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ if (CFP->isExactlyValue(0.0)) {
+ Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
+ break;
+ }
+ }
+ return;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // It's an unknown constraint for us. Let generic code have a go.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+AArch64TargetLowering::getRegForInlineAsmConstraint(
+ const std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &AArch64::GPR32RegClass);
+ else if (VT == MVT::i64)
+ return std::make_pair(0U, &AArch64::GPR64RegClass);
+ break;
+ case 'w':
+ if (VT == MVT::f16)
+ return std::make_pair(0U, &AArch64::FPR16RegClass);
+ else if (VT == MVT::f32)
+ return std::make_pair(0U, &AArch64::FPR32RegClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, &AArch64::FPR64RegClass);
+ else if (VT.getSizeInBits() == 64)
+ return std::make_pair(0U, &AArch64::VPR64RegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &AArch64::FPR128RegClass);
+ else if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &AArch64::VPR128RegClass);
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
new file mode 100644
index 0000000..4960d28
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -0,0 +1,247 @@
+//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AArch64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
+#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+
+namespace llvm {
+namespace AArch64ISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // This is a conditional branch which also notes the flag needed
+ // (eq/sgt/...). A64 puts this information on the branches rather than
+ // compares as LLVM does.
+ BR_CC,
+
+ // A node to be selected to an actual call operation: either BL or BLR in
+ // the absence of tail calls.
+ Call,
+
+ // Indicates a floating-point immediate which fits into the format required
+ // by the FMOV instructions. First (and only) operand is the 8-bit encoded
+ // value of that immediate.
+ FPMOV,
+
+ // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
+ // and an LSB.
+ EXTR,
+
+ // Wraps a load from the GOT, which should always be performed with a 64-bit
+ // load instruction. This prevents the DAG combiner folding a truncate to
+ // form a smaller memory access.
+ GOTLoad,
+
+ // Performs a bitfield insert. Arguments are: the value being inserted into;
+ // the value being inserted; least significant bit changed; width of the
+ // field.
+ BFI,
+
+ // Simply a convenient node inserted during ISelLowering to represent
+ // procedure return. Will almost certainly be selected to "RET".
+ Ret,
+
+ /// Extracts a field of contiguous bits from the source and sign extends
+ /// them into a single register. Arguments are: source; immr; imms. Note
+ /// these are pre-encoded since DAG matching can't cope with combining LSB
+ /// and Width into these values itself.
+ SBFX,
+
+ /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
+ /// main difference is that it only has the values and an A64 condition,
+ /// which will be produced by a setcc instruction.
+ SELECT_CC,
+
+ /// This serves most of the functions of the LLVM SETCC instruction, for two
+ /// purposes. First, it prevents optimisations from fiddling with the
+ /// compare after we've moved the CondCode information onto the SELECT_CC or
+ /// BR_CC instructions. Second, it gives a legal instruction for the actual
+ /// comparison.
+ ///
+ /// It keeps a record of the condition flags asked for because certain
+ /// instructions are only valid for a subset of condition codes.
+ SETCC,
+
+ // Designates a node which is a tail call: both a call and a return
+ // instruction as far as selction is concerned. It should be selected to an
+ // unconditional branch. Has the usual plethora of call operands, but: 1st
+ // is callee, 2nd is stack adjustment required immediately before branch.
+ TC_RETURN,
+
+ // Designates a call used to support the TLS descriptor ABI. The call itself
+ // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
+ // var") must be attached somehow during code generation. It takes two
+ // operands: the callee and the symbol to be relocated against.
+ TLSDESCCALL,
+
+ // Leaf node which will be lowered to an appropriate MRS to obtain the
+ // thread pointer: TPIDR_EL0.
+ THREAD_POINTER,
+
+ /// Extracts a field of contiguous bits from the source and zero extends
+ /// them into a single register. Arguments are: source; immr; imms. Note
+ /// these are pre-encoded since DAG matching can't cope with combining LSB
+ /// and Width into these values itself.
+ UBFX,
+
+ // Wraps an address which the ISelLowering phase has decided should be
+ // created using the small absolute memory model: i.e. adrp/add or
+ // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
+ // get selected.
+ WrapperSmall
+ };
+}
+
+
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64TargetLowering : public TargetLowering {
+public:
+ explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc DL, SDValue &Chain) const;
+
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ /// Finds the incoming stack arguments which overlap the given fixed stack
+ /// object and incorporates their load into the current chain. This prevents
+ /// an upcoming store from clobbering the stack argument before it's used.
+ SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+ MachineFrameInfo *MFI, int ClobberedFI) const;
+
+ EVT getSetCCResultType(EVT VT) const;
+
+ bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+
+ bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ bool isLegalICmpImmediate(int64_t Val) const;
+ SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *
+ emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
+ unsigned Size, unsigned Opcode) const;
+
+ MachineBasicBlock *
+ emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned CmpOp,
+ A64CC::CondCodes Cond) const;
+ MachineBasicBlock *
+ emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size) const;
+
+ MachineBasicBlock *
+ EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
+ SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
+ const char *Constraint) const;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+private:
+ const AArch64Subtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+ const InstrItineraryData *Itins;
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
new file mode 100644
index 0000000..cb93471
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -0,0 +1,961 @@
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file describes AArch64 instruction formats, down to the level of the
+// instruction's overall class.
+// ===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// A64 Instruction Format Definitions.
+//===----------------------------------------------------------------------===//
+
+// A64 is currently the only instruction set supported by the AArch64
+// architecture.
+class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : Instruction {
+ // All A64 instructions are 32-bit. This field will be filled in
+ // gradually going down the hierarchy.
+ field bits<32> Inst;
+
+ field bits<32> Unpredictable = 0;
+ // SoftFail is the generic name for this field, but we alias it so
+ // as to make it more obvious what it means in ARM-land.
+ field bits<32> SoftFail = Unpredictable;
+
+ // LLVM-level model of the AArch64/A64 distinction.
+ let Namespace = "AArch64";
+ let DecoderNamespace = "A64";
+ let Size = 4;
+
+ // Set the templated fields
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = patterns;
+ let Itinerary = itin;
+}
+
+class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
+ let Namespace = "AArch64";
+
+ let OutOperandList = outs;
+ let InOperandList= ins;
+ let Pattern = patterns;
+ let isCodeGenOnly = 1;
+ let isPseudo = 1;
+}
+
+// Represents a pseudo-instruction that represents a single A64 instruction for
+// whatever reason, the eventual result will be a 32-bit real instruction.
+class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
+ : PseudoInst<outs, ins, patterns> {
+ let Size = 4;
+}
+
+// As above, this will be a single A64 instruction, but we can actually give the
+// expansion in TableGen.
+class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
+ : A64PseudoInst<outs, ins, patterns>,
+ PseudoInstExpansion<Result>;
+
+
+// First, some common cross-hierarchy register formats.
+
+class A64InstRd<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rd;
+
+ let Inst{4-0} = Rd;
+}
+
+class A64InstRt<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rt;
+
+ let Inst{4-0} = Rt;
+}
+
+
+class A64InstRdn<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ // Inherit rdt
+ bits<5> Rn;
+
+ let Inst{9-5} = Rn;
+}
+
+class A64InstRtn<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ // Inherit rdt
+ bits<5> Rn;
+
+ let Inst{9-5} = Rn;
+}
+
+// Instructions taking Rt,Rt2,Rn
+class A64InstRtt2n<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rt2;
+
+ let Inst{14-10} = Rt2;
+}
+
+class A64InstRdnm<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ let Inst{20-16} = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Actual A64 Instruction Formats
+//
+
+// Format for Add-subtract (extended register) instructions.
+class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<3> Imm3;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = opt;
+ let Inst{21} = 0b1;
+ // Rm inherited in 20-16
+ let Inst{15-13} = option;
+ let Inst{12-10} = Imm3;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (immediate) instructions.
+class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<12> Imm12;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b10001;
+ let Inst{23-22} = shift;
+ let Inst{21-10} = Imm12;
+}
+
+// Format for Add-subtract (shifted register) instructions.
+class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> Imm6;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = shift;
+ let Inst{21} = 0b0;
+ // Rm inherited in 20-16
+ let Inst{15-10} = Imm6;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Add-subtract (with carry) instructions.
+class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010000;
+ // Rm inherited in 20-16
+ let Inst{15-10} = opcode2;
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+
+// Format for Bitfield instructions
+class A64I_bitfield<bit sf, bits<2> opc, bit n,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<6> ImmR;
+ bits<6> ImmS;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{22} = n;
+ let Inst{21-16} = ImmR;
+ let Inst{15-10} = ImmS;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for compare and branch (immediate) instructions.
+class A64I_cmpbr<bit sf, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ bits<19> Label;
+
+ let Inst{31} = sf;
+ let Inst{30-25} = 0b011010;
+ let Inst{24} = op;
+ let Inst{23-5} = Label;
+ // Inherit Rt in 4-0
+}
+
+// Format for conditional branch (immediate) instructions.
+class A64I_condbr<bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<19> Label;
+ bits<4> Cond;
+
+ let Inst{31-25} = 0b0101010;
+ let Inst{24} = o1;
+ let Inst{23-5} = Label;
+ let Inst{4} = o0;
+ let Inst{3-0} = Cond;
+}
+
+// Format for conditional compare (immediate) instructions.
+class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> UImm5;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010010;
+ let Inst{20-16} = UImm5;
+ let Inst{15-12} = Cond;
+ let Inst{11} = 0b1;
+ let Inst{10} = o2;
+ let Inst{9-5} = Rn;
+ let Inst{4} = o3;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional compare (register) instructions.
+class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010010;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = Cond;
+ let Inst{11} = 0b0;
+ let Inst{10} = o2;
+ let Inst{9-5} = Rn;
+ let Inst{4} = o3;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for conditional select instructions.
+class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<4> Cond;
+
+ let Inst{31} = sf;
+ let Inst{30} = op;
+ let Inst{29} = s;
+ let Inst{28-21} = 0b11010100;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = op2;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for data processing (1 source) instructions
+class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
+ string asmstr, dag outs, dag ins,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b1;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010110;
+ let Inst{20-16} = opcode2;
+ let Inst{15-10} = opcode;
+}
+
+// Format for data processing (2 source) instructions
+class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
+ string asmstr, dag outs, dag ins,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = S;
+ let Inst{28-21} = 0b11010110;
+ let Inst{15-10} = opcode;
+}
+
+// Format for data-processing (3 source) instructions
+
+class A64I_dp3<bit sf, bits<6> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<5> Ra;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opcode{5-4};
+ let Inst{28-24} = 0b11011;
+ let Inst{23-21} = opcode{3-1};
+ // Inherits Rm in 20-16
+ let Inst{15} = opcode{0};
+ let Inst{14-10} = Ra;
+ // Inherits Rn in 9-5
+ // Inherits Rd in 4-0
+}
+
+// Format for exception generation instructions
+class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<16> UImm16;
+
+ let Inst{31-24} = 0b11010100;
+ let Inst{23-21} = opc;
+ let Inst{20-5} = UImm16;
+ let Inst{4-2} = op2;
+ let Inst{1-0} = ll;
+}
+
+// Format for extract (immediate) instructions
+class A64I_extract<bit sf, bits<3> op, bit n,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> LSB;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = op{2-1};
+ let Inst{28-23} = 0b100111;
+ let Inst{22} = n;
+ let Inst{21} = op{0};
+ // Inherits Rm in bits 20-16
+ let Inst{15-10} = LSB;
+ // Inherits Rn in 9-5
+ // Inherits Rd in 4-0
+}
+
+// Format for floating-point compare instructions.
+class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = op;
+ let Inst{13-10} = 0b1000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = opcode2;
+}
+
+// Format for floating-point conditional compare instructions.
+class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> NZCVImm;
+ bits<4> Cond;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4} = op;
+ let Inst{3-0} = NZCVImm;
+}
+
+// Format for floating-point conditional select instructions.
+class A64I_fpcondsel<bit m, bit s, bits<2> type,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<4> Cond;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = Cond;
+ let Inst{11-10} = 0b11;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point data-processing (1 source) instructions.
+class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (2 sources) instructions.
+class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point data-processing (3 sources) instructions.
+class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<5> Ra;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11111;
+ let Inst{23-22} = type;
+ let Inst{21} = o1;
+ // Inherit Rm in 20-16
+ let Inst{15} = o0;
+ let Inst{14-10} = Ra;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> fixed-point conversion instructions.
+class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<6> Scale;
+
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b0;
+ let Inst{20-19} = mode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = Scale;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format for floating-point <-> integer conversion instructions.
+class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = sf;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+
+// Format for floating-point immediate instructions.
+class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<8> Imm8;
+
+ let Inst{31} = m;
+ let Inst{30} = 0b0;
+ let Inst{29} = s;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0b1;
+ let Inst{20-13} = Imm8;
+ let Inst{12-10} = 0b100;
+ let Inst{9-5} = imm5;
+ // Inherit Rd in 4-0
+}
+
+// Format for load-register (literal) instructions.
+class A64I_LDRlit<bits<2> opc, bit v,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ bits<19> Imm19;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b011;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-5} = Imm19;
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store exclusive instructions.
+class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-30} = size;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = o2;
+ let Inst{22} = L;
+ let Inst{21} = o1;
+ let Inst{15} = o0;
+}
+
+class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rt2;
+ let Inst{14-10} = Rt2;
+}
+
+class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rs;
+ let Inst{20-16} = Rs;
+}
+
+class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
+ dag outs, dag ins, string asmstr,
+ list <dag> patterns, InstrItinClass itin>:
+ A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
+ bits<5> Rt2;
+ let Inst{14-10} = Rt2;
+}
+
+// Format for load-store register (immediate post-indexed) instructions
+class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b01;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store register (immediate pre-indexed) instructions
+class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b11;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store register (unprivileged) instructions
+class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for load-store (unscaled immediate) instructions.
+class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<9> SImm9;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = SImm9;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+
+// Format for load-store (unsigned immediate) instructions.
+class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<12> UImm12;
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b01;
+ let Inst{23-22} = opc;
+ let Inst{21-10} = UImm12;
+}
+
+// Format for load-store register (register offset) instructions.
+class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ // Complex operand selection needed for these instructions, so they
+ // need an "addr" field for encoding/decoding to be generated.
+ bits<3> Ext;
+ // OptionHi = Ext{2-1}
+ // S = Ext{0}
+
+ let Inst{31-30} = size;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = v;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = Ext{2-1};
+ let Inst{13} = optionlo;
+ let Inst{12} = Ext{0};
+ let Inst{11-10} = 0b10;
+ // Inherits Rn in 9-5
+ // Inherits Rt in 4-0
+
+ let AddedComplexity = 50;
+}
+
+// Format for Load-store register pair (offset) instructions
+class A64I_LSPoffset<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b010;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (post-indexed) instructions
+class A64I_LSPpostind<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b001;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store register pair (pre-indexed) instructions
+class A64I_LSPpreind<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b011;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Load-store non-temporal register pair (offset) instructions
+class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
+ bits<7> SImm7;
+
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = v;
+ let Inst{25-23} = 0b000;
+ let Inst{22} = l;
+ let Inst{21-15} = SImm7;
+ // Inherit Rt2 in 14-10
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format for Logical (immediate) instructions
+class A64I_logicalimm<bit sf, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bit N;
+ bits<6> ImmR;
+ bits<6> ImmS;
+
+ // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
+ // selection), so we'll combine them into a single field here.
+ bits<13> Imm;
+ // N = Imm{12};
+ // ImmR = Imm{11-6};
+ // ImmS = Imm{5-0};
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100100;
+ let Inst{22} = Imm{12};
+ let Inst{21-16} = Imm{11-6};
+ let Inst{15-10} = Imm{5-0};
+ // Rn inherited in 9-5
+ // Rd inherited in 4-0
+}
+
+// Format for Logical (shifted register) instructions
+class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ bits<6> Imm6;
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-24} = 0b01010;
+ let Inst{23-22} = shift;
+ let Inst{21} = N;
+ // Rm inherited
+ let Inst{15-10} = Imm6;
+ // Rn inherited
+ // Rd inherited
+}
+
+// Format for Move wide (immediate)
+class A64I_movw<bit sf, bits<2> opc,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<16> UImm16;
+ bits<2> Shift; // Called "hw" officially
+
+ let Inst{31} = sf;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = Shift;
+ let Inst{20-5} = UImm16;
+ // Inherits Rd in 4-0
+}
+
+// Format for PC-relative addressing instructions, ADR and ADRP.
+class A64I_PCADR<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRd<outs, ins, asmstr, patterns, itin> {
+ bits<21> Label;
+
+ let Inst{31} = op;
+ let Inst{30-29} = Label{1-0};
+ let Inst{28-24} = 0b10000;
+ let Inst{23-5} = Label{20-2};
+}
+
+// Format for system instructions
+class A64I_system<bit l,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ bits<2> Op0;
+ bits<3> Op1;
+ bits<4> CRn;
+ bits<4> CRm;
+ bits<3> Op2;
+ bits<5> Rt;
+
+ let Inst{31-22} = 0b1101010100;
+ let Inst{21} = l;
+ let Inst{20-19} = Op0;
+ let Inst{18-16} = Op1;
+ let Inst{15-12} = CRn;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = Op2;
+ let Inst{4-0} = Rt;
+
+ // These instructions can do horrible things.
+ let hasSideEffects = 1;
+}
+
+// Format for unconditional branch (immediate) instructions
+class A64I_Bimm<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<26> Label;
+
+ let Inst{31} = op;
+ let Inst{30-26} = 0b00101;
+ let Inst{25-0} = Label;
+}
+
+// Format for Test & branch (immediate) instructions
+class A64I_TBimm<bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRt<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<6> Imm;
+ bits<14> Label;
+
+ let Inst{31} = Imm{5};
+ let Inst{30-25} = 0b011011;
+ let Inst{24} = op;
+ let Inst{23-19} = Imm{4-0};
+ let Inst{18-5} = Label;
+ // Inherit Rt in 4-0
+}
+
+// Format for Unconditional branch (register) instructions, including
+// RET. Shares no fields with instructions further up the hierarchy
+// so top-level.
+class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64Inst<outs, ins, asmstr, patterns, itin> {
+ // Doubly special in not even sharing register fields with other
+ // instructions, so we create our own Rn here.
+ bits<5> Rn;
+
+ let Inst{31-25} = 0b1101011;
+ let Inst{24-21} = opc;
+ let Inst{20-16} = op2;
+ let Inst{15-10} = op3;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = op4;
+}
+
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
new file mode 100644
index 0000000..7b93463
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -0,0 +1,822 @@
+//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <algorithm>
+
+#define GET_INSTRINFO_CTOR
+#include "AArch64GenInstrInfo.inc"
+
+using namespace llvm;
+
+AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
+ : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+ RI(*this, STI), Subtarget(STI) {}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc = 0;
+ unsigned ZeroReg = 0;
+ if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
+ // E.g. ADD xDst, xsp, #0 (, lsl #0)
+ BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return;
+ } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+ // E.g. ADD wDST, wsp, #0 (, lsl #0)
+ BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return;
+ } else if (DestReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ // E.g. MSR NZCV, xDST
+ BuildMI(MBB, I, DL, get(AArch64::MSRix))
+ .addImm(A64SysReg::NZCV)
+ .addReg(SrcReg);
+ } else if (SrcReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(DestReg));
+ // E.g. MRS xDST, NZCV
+ BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
+ .addImm(A64SysReg::NZCV);
+ } else if (AArch64::GPR64RegClass.contains(DestReg)) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ Opc = AArch64::ORRxxx_lsl;
+ ZeroReg = AArch64::XZR;
+ } else if (AArch64::GPR32RegClass.contains(DestReg)) {
+ assert(AArch64::GPR32RegClass.contains(SrcReg));
+ Opc = AArch64::ORRwww_lsl;
+ ZeroReg = AArch64::WZR;
+ } else if (AArch64::FPR32RegClass.contains(DestReg)) {
+ assert(AArch64::FPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+ .addReg(SrcReg);
+ return;
+ } else if (AArch64::FPR64RegClass.contains(DestReg)) {
+ assert(AArch64::FPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+ .addReg(SrcReg);
+ return;
+ } else if (AArch64::FPR128RegClass.contains(DestReg)) {
+ assert(AArch64::FPR128RegClass.contains(SrcReg));
+
+ // FIXME: there's no good way to do this, at least without NEON:
+ // + There's no single move instruction for q-registers
+ // + We can't create a spill slot and use normal STR/LDR because stack
+ // allocation has already happened
+ // + We can't go via X-registers with FMOV because register allocation has
+ // already happened.
+ // This may not be efficient, but at least it works.
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ } else {
+ llvm_unreachable("Unknown register class in copyPhysReg");
+ }
+
+ // E.g. ORR xDst, xzr, xSrc, lsl #0
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(ZeroReg)
+ .addReg(SrcReg)
+ .addImm(0);
+}
+
+MachineInstr *
+AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+ .addFrameIndex(FrameIx).addImm(0)
+ .addImm(Offset)
+ .addMetadata(MDPtr);
+ return &*MIB;
+}
+
+/// Does the Opcode represent a conditional branch that we can remove and re-add
+/// at the end of a basic block?
+static bool isCondBranch(unsigned Opc) {
+ return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
+ Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
+ Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
+ Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
+}
+
+/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
+/// setting TBB to the destination basic block and populating the Cond vector
+/// with data necessary to recreate the conditional branch at a later
+/// date. First element will be the opcode, and subsequent ones define the
+/// conditions being branched on in an instruction-specific manner.
+static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ switch(I->getOpcode()) {
+ case AArch64::Bcc:
+ case AArch64::CBZw:
+ case AArch64::CBZx:
+ case AArch64::CBNZw:
+ case AArch64::CBNZx:
+ // These instructions just have one predicate operand in position 0 (either
+ // a condition code or a register being compared).
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ TBB = I->getOperand(1).getMBB();
+ return;
+ case AArch64::TBZwii:
+ case AArch64::TBZxii:
+ case AArch64::TBNZwii:
+ case AArch64::TBNZxii:
+ // These have two predicate operands: a register and a bit position.
+ Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
+ Cond.push_back(I->getOperand(0));
+ Cond.push_back(I->getOperand(1));
+ TBB = I->getOperand(2).getMBB();
+ return;
+ default:
+ llvm_unreachable("Unknown conditional branch to classify");
+ }
+}
+
+
+bool
+AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastOpc == AArch64::Bimm) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranch(LastOpc)) {
+ classifyCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && LastOpc == AArch64::Bimm) {
+ while (SecondLastOpc == AArch64::Bimm) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a B and a Bcc, handle it.
+ if (LastOpc == AArch64::Bimm) {
+ if (SecondLastOpc == AArch64::Bcc) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(SecondLastOpc)) {
+ classifyCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+bool AArch64InstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ switch (Cond[0].getImm()) {
+ case AArch64::Bcc: {
+ A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
+ CC = A64InvertCondCode(CC);
+ Cond[1].setImm(CC);
+ return false;
+ }
+ case AArch64::CBZw:
+ Cond[0].setImm(AArch64::CBNZw);
+ return false;
+ case AArch64::CBZx:
+ Cond[0].setImm(AArch64::CBNZx);
+ return false;
+ case AArch64::CBNZw:
+ Cond[0].setImm(AArch64::CBZw);
+ return false;
+ case AArch64::CBNZx:
+ Cond[0].setImm(AArch64::CBZx);
+ return false;
+ case AArch64::TBZwii:
+ Cond[0].setImm(AArch64::TBNZwii);
+ return false;
+ case AArch64::TBZxii:
+ Cond[0].setImm(AArch64::TBNZxii);
+ return false;
+ case AArch64::TBNZwii:
+ Cond[0].setImm(AArch64::TBZwii);
+ return false;
+ case AArch64::TBNZxii:
+ Cond[0].setImm(AArch64::TBZxii);
+ return false;
+ default:
+ llvm_unreachable("Unknown branch type");
+ }
+}
+
+
+unsigned
+AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ if (FBB == 0 && Cond.empty()) {
+ BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
+ return 1;
+ } else if (FBB == 0) {
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ for (int i = 1, e = Cond.size(); i != e; ++i)
+ MIB.addOperand(Cond[i]);
+ MIB.addMBB(TBB);
+ return 1;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ for (int i = 1, e = Cond.size(); i != e; ++i)
+ MIB.addOperand(Cond[i]);
+ MIB.addMBB(TBB);
+
+ BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
+ return 2;
+}
+
+unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!isCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool
+AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AArch64::TLSDESC_BLRx: {
+ MachineInstr *NewMI =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
+ .addOperand(MI.getOperand(1));
+ MI.setDesc(get(AArch64::BLRx));
+
+ llvm::finalizeBundle(MBB, NewMI, *++MBBI);
+ return true;
+ }
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+void
+AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+ MachineMemOperand *MMO
+ = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ Align);
+
+ unsigned StoreOp = 0;
+ if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+ switch(RC->getSize()) {
+ case 4: StoreOp = AArch64::LS32_STR; break;
+ case 8: StoreOp = AArch64::LS64_STR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ } else {
+ assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
+ RC->hasType(MVT::f128))
+ && "Expected integer or floating type for store");
+ switch (RC->getSize()) {
+ case 4: StoreOp = AArch64::LSFP32_STR; break;
+ case 8: StoreOp = AArch64::LSFP64_STR; break;
+ case 16: StoreOp = AArch64::LSFP128_STR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ }
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
+ NewMI.addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx)
+ .addImm(0)
+ .addMemOperand(MMO);
+
+}
+
+void
+AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
+
+ MachineMemOperand *MMO
+ = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ Align);
+
+ unsigned LoadOp = 0;
+ if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
+ switch(RC->getSize()) {
+ case 4: LoadOp = AArch64::LS32_LDR; break;
+ case 8: LoadOp = AArch64::LS64_LDR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ } else {
+ assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64)
+ || RC->hasType(MVT::f128))
+ && "Expected integer or floating type for store");
+ switch (RC->getSize()) {
+ case 4: LoadOp = AArch64::LSFP32_LDR; break;
+ case 8: LoadOp = AArch64::LSFP64_LDR; break;
+ case 16: LoadOp = AArch64::LSFP128_LDR; break;
+ default:
+ llvm_unreachable("Unknown size for regclass");
+ }
+ }
+
+ MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
+ NewMI.addFrameIndex(FrameIdx)
+ .addImm(0)
+ .addMemOperand(MMO);
+}
+
+unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
+ unsigned Limit = (1 << 16) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
+ // is the largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
+ Limit = std::min(Limit, 0xfffu);
+ break;
+ }
+
+ int AccessScale, MinOffset, MaxOffset;
+ getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
+ Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
+ int &AccessScale, int &MinOffset,
+ int &MaxOffset) const {
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unkown load/store kind");
+ case TargetOpcode::DBG_VALUE:
+ AccessScale = 1;
+ MinOffset = INT_MIN;
+ MaxOffset = INT_MAX;
+ return;
+ case AArch64::LS8_LDR: case AArch64::LS8_STR:
+ case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
+ case AArch64::LDRSBw:
+ case AArch64::LDRSBx:
+ AccessScale = 1;
+ MinOffset = 0;
+ MaxOffset = 0xfff;
+ return;
+ case AArch64::LS16_LDR: case AArch64::LS16_STR:
+ case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
+ case AArch64::LDRSHw:
+ case AArch64::LDRSHx:
+ AccessScale = 2;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LS32_LDR: case AArch64::LS32_STR:
+ case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
+ case AArch64::LDRSWx:
+ case AArch64::LDPSWx:
+ AccessScale = 4;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LS64_LDR: case AArch64::LS64_STR:
+ case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
+ case AArch64::PRFM:
+ AccessScale = 8;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
+ AccessScale = 16;
+ MinOffset = 0;
+ MaxOffset = 0xfff * AccessScale;
+ return;
+ case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
+ case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
+ AccessScale = 4;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
+ case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
+ AccessScale = 8;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
+ AccessScale = 16;
+ MinOffset = -0x40 * AccessScale;
+ MaxOffset = 0x3f * AccessScale;
+ return;
+ }
+}
+
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ const MCInstrDesc &MCID = MI.getDesc();
+ const MachineBasicBlock &MBB = *MI.getParent();
+ const MachineFunction &MF = *MBB.getParent();
+ const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+
+ if (MCID.getSize())
+ return MCID.getSize();
+
+ if (MI.getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+
+ if (MI.isLabel())
+ return 0;
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return 0;
+ case AArch64::TLSDESCCALL:
+ return 0;
+ default:
+ llvm_unreachable("Unknown instruction class");
+ }
+}
+
+unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
+bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo &TII) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MFI.getObjectOffset(FrameRegIdx);
+ llvm_unreachable("Unimplemented rewriteFrameIndex");
+}
+
+void llvm::emitRegUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+ int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
+ if (NumBytes == 0 && DstReg == SrcReg)
+ return;
+ else if (abs(NumBytes) & ~0xffffff) {
+ // Generically, we have to materialize the offset into a temporary register
+ // and subtract it. There are a couple of ways this could be done, for now
+ // we'll use a movz/movk or movn/movk sequence.
+ uint64_t Bits = static_cast<uint64_t>(abs(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
+ .addImm(0xffff & Bits).addImm(0)
+ .setMIFlags(MIFlags);
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(1)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(2)
+ .setMIFlags(MIFlags);
+ }
+
+ Bits >>= 16;
+ if (Bits & 0xffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0xffff & Bits).addImm(3)
+ .setMIFlags(MIFlags);
+ }
+
+ // ADD DST, SRC, xTMP (, lsl #0)
+ unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
+ BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(0)
+ .setMIFlag(MIFlags);
+ return;
+ }
+
+ // Now we know that the adjustment can be done in at most two add/sub
+ // (immediate) instructions, which is always more efficient than a
+ // literal-pool load, or even a hypothetical movz/movk/add sequence
+
+ // Decide whether we're doing addition or subtraction
+ unsigned LowOp, HighOp;
+ if (NumBytes >= 0) {
+ LowOp = AArch64::ADDxxi_lsl0_s;
+ HighOp = AArch64::ADDxxi_lsl12_s;
+ } else {
+ LowOp = AArch64::SUBxxi_lsl0_s;
+ HighOp = AArch64::SUBxxi_lsl12_s;
+ NumBytes = abs(NumBytes);
+ }
+
+ // If we're here, at the very least a move needs to be produced, which just
+ // happens to be materializable by an ADD.
+ if ((NumBytes & 0xfff) || NumBytes == 0) {
+ BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(NumBytes & 0xfff)
+ .setMIFlag(MIFlags);
+
+ // Next update should use the register we've just defined.
+ SrcReg = DstReg;
+ }
+
+ if (NumBytes & 0xfff000) {
+ BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(NumBytes >> 12)
+ .setMIFlag(MIFlags);
+ }
+}
+
+void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned ScratchReg, int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags) {
+ emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
+ NumBytes, MIFlags);
+}
+
+
+namespace {
+ struct LDTLSCleanup : public MachineFunctionPass {
+ static char ID;
+ LDTLSCleanup() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ AArch64MachineFunctionInfo* MFI
+ = MF.getInfo<AArch64MachineFunctionInfo>();
+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
+ // No point folding accesses if there isn't at least two.
+ return false;
+ }
+
+ MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
+ return VisitNode(DT->getRootNode(), 0);
+ }
+
+ // Visit the dominator subtree rooted at Node in pre-order.
+ // If TLSBaseAddrReg is non-null, then use that to replace any
+ // TLS_base_addr instructions. Otherwise, create the register
+ // when the first such instruction is seen, and then use it
+ // as we encounter more instructions.
+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
+ MachineBasicBlock *BB = Node->getBlock();
+ bool Changed = false;
+
+ // Traverse the current block.
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ ++I) {
+ switch (I->getOpcode()) {
+ case AArch64::TLSDESC_BLRx:
+ // Make sure it's a local dynamic access.
+ if (!I->getOperand(1).isSymbol() ||
+ strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ break;
+
+ if (TLSBaseAddrReg)
+ I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
+ else
+ I = SetRegister(I, &TLSBaseAddrReg);
+ Changed = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Visit the children of this block in the dominator tree.
+ for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
+ I != E; ++I) {
+ Changed |= VisitNode(*I, TLSBaseAddrReg);
+ }
+
+ return Changed;
+ }
+
+ // Replace the TLS_base_addr instruction I with a copy from
+ // TLSBaseAddrReg, returning the new instruction.
+ MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
+ unsigned TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+ // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
+ // code sequence assumes the address will be.
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ AArch64::X0)
+ .addReg(TLSBaseAddrReg);
+
+ // Erase the TLS_base_addr instruction.
+ I->eraseFromParent();
+
+ return Copy;
+ }
+
+ // Create a virtal register in *TLSBaseAddrReg, and populate it by
+ // inserting a copy instruction after I. Returns the new instruction.
+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
+ MachineFunction *MF = I->getParent()->getParent();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
+
+ // Create a virtual register for the TLS base address.
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+
+ // Insert a copy from X0 to TLSBaseAddrReg for later.
+ MachineInstr *Next = I->getNextNode();
+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ *TLSBaseAddrReg)
+ .addReg(AArch64::X0);
+
+ return Copy;
+ }
+
+ virtual const char *getPassName() const {
+ return "Local Dynamic TLS Access Clean-up";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char LDTLSCleanup::ID = 0;
+FunctionPass*
+llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
new file mode 100644
index 0000000..22a2ab4
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -0,0 +1,112 @@
+//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
+#define LLVM_TARGET_AARCH64INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AArch64RegisterInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "AArch64GenInstrInfo.inc"
+
+namespace llvm {
+
+class AArch64Subtarget;
+
+class AArch64InstrInfo : public AArch64GenInstrInfo {
+ const AArch64RegisterInfo RI;
+ const AArch64Subtarget &Subtarget;
+public:
+ explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ const AArch64Subtarget &getSubTarget() const { return Subtarget; }
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+ /// Look through the instructions in this function and work out the largest
+ /// the stack frame can be while maintaining the ability to address local
+ /// slots with no complexities.
+ unsigned estimateRSStackLimit(MachineFunction &MF) const;
+
+ /// getAddressConstraints - For loads and stores (and PRFMs) taking an
+ /// immediate offset, this function determines the constraints required for
+ /// the immediate. It must satisfy:
+ /// + MinOffset <= imm <= MaxOffset
+ /// + imm % OffsetScale == 0
+ void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
+ int &MinOffset, int &MaxOffset) const;
+
+
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
+
+};
+
+bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo &TII);
+
+
+void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
+ int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc dl, const TargetInstrInfo &TII,
+ unsigned ScratchReg, int64_t NumBytes,
+ MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
new file mode 100644
index 0000000..562a7f6
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -0,0 +1,5109 @@
+//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the AArch64 scalar instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Target-specific ISD nodes and profiles
+//===----------------------------------------------------------------------===//
+
+def SDT_A64ret : SDTypeProfile<0, 0, []>;
+def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
+ SDNPOptInGlue,
+ SDNPVariadic]>;
+
+// (ins NZCV, Condition, Dest)
+def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
+def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
+
+// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
+def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<2, 3>]>;
+def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
+
+// (outs NZCV), (ins LHS, RHS, Condition)
+def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>]>;
+def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
+
+
+// (outs GPR64), (ins)
+def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+// A64 compares don't care about the cond really (they set all flags) so a
+// simple binary operator is useful.
+def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64setcc node:$lhs, node:$rhs, cond)>;
+
+
+// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
+// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
+// and V flags can be set differently by this operation. It comes down to
+// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
+// then everything is fine. If not then the optimization is wrong. Thus general
+// comparisons are only valid if op2 != 0.
+
+// So, finally, the only LLVM-native comparisons that don't mention C and V are
+// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
+// absence of information about op2.
+def equality_cond : PatLeaf<(cond), [{
+ return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
+}]>;
+
+def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
+
+// There are two layers of indirection here, driven by the following
+// considerations.
+// + TableGen does not know CodeModel or Reloc so that decision should be
+// made for a variable/address at ISelLowering.
+// + The output of ISelLowering should be selectable (hence the Wrapper,
+// rather than a bare target opcode)
+def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i32>,
+ SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+
+
+def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
+ [SDNPHasChain]>;
+
+
+// (A64BFI LHS, RHS, LSB, Width)
+def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>,
+ SDTCisVT<4, i64>]>;
+
+def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
+
+// (A64EXTR HiReg, LoReg, LSB)
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>]>;
+def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+// (A64[SU]BFX Field, ImmR, ImmS).
+//
+// Note that ImmR and ImmS are already encoded for the actual instructions. The
+// more natural LSB and Width mix together to form ImmR and ImmS, something
+// which TableGen can't handle.
+def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
+def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
+
+def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+
+//===----------------------------------------------------------------------===//
+// Call sequence pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+
+def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
+// destination but needs a relocation against a fixed symbol. As such it has two
+// certain operands: the callee and the relocated variable.
+//
+// The TLS ABI only allows it to be selected to a BLR instructin (with
+// appropriate relocation).
+def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+
+def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
+
+def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+
+// These pseudo-instructions have special semantics by virtue of being passed to
+// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
+// LowerCall to (in our case) tell the back-end about stack adjustments for
+// arguments passed on the stack. Here we select those markers to
+// pseudo-instructions which explicitly set the stack, and finally in the
+// RegisterInfo we convert them to a true stack adjustment.
+let Defs = [XSP], Uses = [XSP] in {
+ def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
+ [(AArch64callseq_start timm:$amt)]>;
+
+ def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic operation pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+multiclass AtomicSizes<string opname> {
+ def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>;
+ def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set GPR32:$dst, (!cast<SDNode>(opname # "_16") GPR64:$ptr, GPR32:$incr))]>;
+ def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
+ [(set GPR32:$dst, (!cast<SDNode>(opname # "_32") GPR64:$ptr, GPR32:$incr))]>;
+ def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
+ [(set GPR64:$dst, (!cast<SDNode>(opname # "_64") GPR64:$ptr, GPR64:$incr))]>;
+}
+}
+
+defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">;
+defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">;
+defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
+defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
+defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
+defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
+defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
+defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
+defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
+
+let usesCustomInserter = 1, Defs = [NZCV] in {
+def ATOMIC_CMP_SWAP_I8
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set GPR32:$dst,
+ (atomic_cmp_swap_8 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
+def ATOMIC_CMP_SWAP_I16
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set GPR32:$dst,
+ (atomic_cmp_swap_16 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
+def ATOMIC_CMP_SWAP_I32
+ : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
+ [(set GPR32:$dst,
+ (atomic_cmp_swap_32 GPR64:$ptr, GPR32:$old, GPR32:$new))]>;
+def ATOMIC_CMP_SWAP_I64
+ : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
+ [(set GPR64:$dst,
+ (atomic_cmp_swap_64 GPR64:$ptr, GPR64:$old, GPR64:$new))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (extended register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
+
+// The RHS of these operations is conceptually a sign/zero-extended
+// register, optionally shifted left by 1-4. The extension can be a
+// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
+// must be specified with one exception:
+
+// If one of the registers is sp/wsp then LSL is an alias for UXTW in
+// 32-bit instructions and UXTX in 64-bit versions, the shift amount
+// is not optional in that case (but can explicitly be 0), and the
+// entire suffix can be skipped (e.g. "add sp, x3, x2").
+
+multiclass extend_operands<string PREFIX, string Diag> {
+ def _asmoperand : AsmOperandClass {
+ let Name = PREFIX;
+ let RenderMethod = "addRegExtendOperands";
+ let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
+ let DiagnosticType = "AddSubRegExtend" # Diag;
+ }
+
+ def _operand : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
+ let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
+ let DecoderMethod = "DecodeRegExtendOperand";
+ let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
+ }
+}
+
+defm UXTB : extend_operands<"UXTB", "Small">;
+defm UXTH : extend_operands<"UXTH", "Small">;
+defm UXTW : extend_operands<"UXTW", "Small">;
+defm UXTX : extend_operands<"UXTX", "Large">;
+defm SXTB : extend_operands<"SXTB", "Small">;
+defm SXTH : extend_operands<"SXTH", "Small">;
+defm SXTW : extend_operands<"SXTW", "Small">;
+defm SXTX : extend_operands<"SXTX", "Large">;
+
+def LSL_extasmoperand : AsmOperandClass {
+ let Name = "RegExtendLSL";
+ let RenderMethod = "addRegExtendOperands";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+
+def LSL_extoperand : Operand<i64> {
+ let ParserMatchClass = LSL_extasmoperand;
+}
+
+
+// The patterns for various sign-extensions are a little ugly and
+// non-uniform because everything has already been promoted to the
+// legal i64 and i32 types. We'll wrap the various variants up in a
+// class for use later.
+class extend_types {
+ dag uxtb; dag uxth; dag uxtw; dag uxtx;
+ dag sxtb; dag sxth; dag sxtw; dag sxtx;
+}
+
+def extends_to_i64 : extend_types {
+ let uxtb = (and (anyext GPR32:$Rm), 255);
+ let uxth = (and (anyext GPR32:$Rm), 65535);
+ let uxtw = (zext GPR32:$Rm);
+ let uxtx = (i64 GPR64:$Rm);
+
+ let sxtb = (sext_inreg (anyext GPR32:$Rm), i8);
+ let sxth = (sext_inreg (anyext GPR32:$Rm), i16);
+ let sxtw = (sext GPR32:$Rm);
+ let sxtx = (i64 GPR64:$Rm);
+}
+
+
+def extends_to_i32 : extend_types {
+ let uxtb = (and GPR32:$Rm, 255);
+ let uxth = (and GPR32:$Rm, 65535);
+ let uxtw = (i32 GPR32:$Rm);
+ let uxtx = (i32 GPR32:$Rm);
+
+ let sxtb = (sext_inreg GPR32:$Rm, i8);
+ let sxth = (sext_inreg GPR32:$Rm, i16);
+ let sxtw = (i32 GPR32:$Rm);
+ let sxtx = (i32 GPR32:$Rm);
+}
+
+// Now, six of the extensions supported are easy and uniform: if the source size
+// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
+// those instructions in one block.
+
+// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
+// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
+// be impossible.
+// + Patterns are very different as well.
+// + Passing different registers would be ugly (more fields in extend_types
+// would probably be the best option).
+multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
+ SDPatternOperator opfrag,
+ dag outs, extend_types exts, RegisterClass GPRsp> {
+ def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
+ outs,
+ (ins GPRsp:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPRsp:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
+ NoItinerary>;
+ def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
+ outs,
+ (ins GPRsp:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPRsp:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
+ NoItinerary>;
+ def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
+ outs,
+ (ins GPRsp:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPRsp:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
+ NoItinerary>;
+
+ def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
+ outs,
+ (ins GPRsp:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPRsp:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
+ NoItinerary>;
+ def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
+ outs,
+ (ins GPRsp:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPRsp:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
+ NoItinerary>;
+ def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
+ outs,
+ (ins GPRsp:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPRsp:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
+ NoItinerary>;
+}
+
+// These two could be merge in with the above, but their patterns aren't really
+// necessary and the naming-scheme would necessarily break:
+multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
+ dag outs> {
+ def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
+ outs,
+ (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [(opfrag GPR64xsp:$Rn, (shl GPR64:$Rm, UXTX_operand:$Imm3))],
+ NoItinerary>;
+
+ def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
+ outs,
+ (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No Pattern: same as uxtx */],
+ NoItinerary>;
+}
+
+multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
+ def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
+ outs,
+ (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No pattern: probably same as uxtw */],
+ NoItinerary>;
+
+ def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
+ outs,
+ (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
+ !strconcat(asmop, "$Rn, $Rm, $Imm3"),
+ [/* No Pattern: probably same as uxtw */],
+ NoItinerary>;
+}
+
+class SetRD<RegisterClass RC, SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
+class SetNZCV<SDPatternOperator op>
+ : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
+
+defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+ (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>,
+ addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
+ (outs GPR64xsp:$Rd)>;
+defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
+ (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>,
+ addsub_wxtx< 0b0, 0b0, "add\t$Rd, ",
+ (outs GPR32wsp:$Rd)>;
+defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+ (outs GPR64xsp:$Rd), extends_to_i64, GPR64xsp>,
+ addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
+ (outs GPR64xsp:$Rd)>;
+defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
+ (outs GPR32wsp:$Rd), extends_to_i32, GPR32wsp>,
+ addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ",
+ (outs GPR32wsp:$Rd)>;
+
+let Defs = [NZCV] in {
+defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+ (outs GPR64:$Rd), extends_to_i64, GPR64xsp>,
+ addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
+ (outs GPR64:$Rd)>;
+defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
+ (outs GPR32:$Rd), extends_to_i32, GPR32wsp>,
+ addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ",
+ (outs GPR32:$Rd)>;
+defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+ (outs GPR64:$Rd), extends_to_i64, GPR64xsp>,
+ addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
+ (outs GPR64:$Rd)>;
+defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
+ (outs GPR32:$Rd), extends_to_i32, GPR32wsp>,
+ addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ",
+ (outs GPR32:$Rd)>;
+
+
+let Rd = 0b11111, isCompare = 1 in {
+defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+ (outs), extends_to_i64, GPR64xsp>,
+ addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
+defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
+ (outs), extends_to_i32, GPR32wsp>,
+ addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>;
+defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+ (outs), extends_to_i64, GPR64xsp>,
+ addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
+defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
+ (outs), extends_to_i32, GPR32wsp>,
+ addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>;
+}
+}
+
+// Now patterns for the operation without a shift being needed. No patterns are
+// created for uxtx/sxtx since they're non-uniform and it's expected that
+// add/sub (shifted register) will handle those cases anyway.
+multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
+ RegisterClass GPRsp, extend_types exts> {
+ def : Pat<(nodeop GPRsp:$Rn, exts.uxtb),
+ (!cast<Instruction>(prefix # "w_uxtb") GPRsp:$Rn, GPR32:$Rm, 0)>;
+ def : Pat<(nodeop GPRsp:$Rn, exts.uxth),
+ (!cast<Instruction>(prefix # "w_uxth") GPRsp:$Rn, GPR32:$Rm, 0)>;
+ def : Pat<(nodeop GPRsp:$Rn, exts.uxtw),
+ (!cast<Instruction>(prefix # "w_uxtw") GPRsp:$Rn, GPR32:$Rm, 0)>;
+
+ def : Pat<(nodeop GPRsp:$Rn, exts.sxtb),
+ (!cast<Instruction>(prefix # "w_sxtb") GPRsp:$Rn, GPR32:$Rm, 0)>;
+ def : Pat<(nodeop GPRsp:$Rn, exts.sxth),
+ (!cast<Instruction>(prefix # "w_sxth") GPRsp:$Rn, GPR32:$Rm, 0)>;
+ def : Pat<(nodeop GPRsp:$Rn, exts.sxtw),
+ (!cast<Instruction>(prefix # "w_sxtw") GPRsp:$Rn, GPR32:$Rm, 0)>;
+}
+
+defm : addsubext_noshift_patterns<"ADDxx", add, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"ADDww", add, GPR32wsp, extends_to_i32>;
+defm : addsubext_noshift_patterns<"SUBxx", sub, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"SUBww", sub, GPR32wsp, extends_to_i32>;
+
+defm : addsubext_noshift_patterns<"CMNx", A64cmn, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMNw", A64cmn, GPR32wsp, extends_to_i32>;
+defm : addsubext_noshift_patterns<"CMPx", A64cmp, GPR64xsp, extends_to_i64>;
+defm : addsubext_noshift_patterns<"CMPw", A64cmp, GPR32wsp, extends_to_i32>;
+
+// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
+// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
+// operation. Also permitted in this case is complete omission of the argument,
+// which implies "lsl #0".
+multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
+ RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+ def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+ def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
+ (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+
+}
+
+defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>;
+defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
+defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
+defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>;
+defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>;
+
+// Rd cannot be sp for flag-setting variants so only half of the aliases are
+// needed.
+defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
+defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
+defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
+
+// CMP unfortunately has to be different because the instruction doesn't have a
+// dest register.
+multiclass cmp_lsl_aliases<string asmop, Instruction inst,
+ RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
+ def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+ (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
+
+ def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
+ (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
+}
+
+defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
+defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
+defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
+
+// These instructions accept a 12-bit unsigned immediate, optionally shifted
+// left by 12 bits. Official assembly format specifies a 12 bit immediate with
+// one of "", "LSL #0", "LSL #12" supplementary operands.
+
+// There are surprisingly few ways to make this work with TableGen, so this
+// implementation has separate instructions for the "LSL #0" and "LSL #12"
+// variants.
+
+// If the MCInst retained a single combined immediate (which could be 0x123000,
+// for example) then both components (imm & shift) would have to be delegated to
+// a single assembly operand. This would entail a separate operand parser
+// (because the LSL would have to live in the same AArch64Operand as the
+// immediate to be accessible); assembly parsing is rather complex and
+// error-prone C++ code.
+//
+// By splitting the immediate, we can delegate handling this optional operand to
+// an InstAlias. Supporting functions to generate the correct MCInst are still
+// required, but these are essentially trivial and parsing can remain generic.
+//
+// Rejected plans with rationale:
+// ------------------------------
+//
+// In an ideal world you'de have two first class immediate operands (in
+// InOperandList, specifying imm12 and shift). Unfortunately this is not
+// selectable by any means I could discover.
+//
+// An Instruction with two MCOperands hidden behind a single entry in
+// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
+// but required more C++ code to handle encoding/decoding. Parsing (the intended
+// main beneficiary) ended up equally complex because of the optional nature of
+// "LSL #0".
+//
+// Attempting to circumvent the need for a custom OperandParser above by giving
+// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
+// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
+// should be parsed: there was no way to accommodate an "lsl #12".
+
+let ParserMethod = "ParseImmWithLSLOperand",
+ RenderMethod = "addImmWithLSLOperands" in {
+ // Derived PredicateMethod fields are different for each
+ def addsubimm_lsl0_asmoperand : AsmOperandClass {
+ let Name = "AddSubImmLSL0";
+ // If an error is reported against this operand, instruction could also be a
+ // register variant.
+ let DiagnosticType = "AddSubSecondSource";
+ }
+
+ def addsubimm_lsl12_asmoperand : AsmOperandClass {
+ let Name = "AddSubImmLSL12";
+ let DiagnosticType = "AddSubSecondSource";
+ }
+}
+
+def shr_12_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
+}]>;
+
+def shr_12_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
+}]>;
+
+def neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
+}]>;
+
+
+multiclass addsub_imm_operands<ValueType ty> {
+ let PrintMethod = "printAddSubImmLSL0Operand",
+ EncoderMethod = "getAddSubImmOpValue",
+ ParserMatchClass = addsubimm_lsl0_asmoperand in {
+ def _posimm_lsl0 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
+ def _negimm_lsl0 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
+ neg_XFORM>;
+ }
+
+ let PrintMethod = "printAddSubImmLSL12Operand",
+ EncoderMethod = "getAddSubImmOpValue",
+ ParserMatchClass = addsubimm_lsl12_asmoperand in {
+ def _posimm_lsl12 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
+ shr_12_XFORM>;
+
+ def _negimm_lsl12 : Operand<ty>,
+ ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
+ shr_12_neg_XFORM>;
+ }
+}
+
+// The add operands don't need any transformation
+defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
+defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
+
+multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
+ string asmop, string cmpasmop,
+ Operand imm_operand, Operand cmp_imm_operand,
+ RegisterClass GPR, RegisterClass GPRsp,
+ AArch64Reg ZR> {
+ // All registers for non-S variants allow SP
+ def _s : A64I_addsubimm<sf, op, 0b0, shift,
+ (outs GPRsp:$Rd),
+ (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
+ [(set GPRsp:$Rd,
+ (add GPRsp:$Rn, imm_operand:$Imm12))],
+ NoItinerary>;
+
+
+ // S variants can read SP but would write to ZR
+ def _S : A64I_addsubimm<sf, op, 0b1, shift,
+ (outs GPR:$Rd),
+ (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
+ [(set GPR:$Rd, (addc GPRsp:$Rn, imm_operand:$Imm12))],
+ NoItinerary> {
+ let Defs = [NZCV];
+ }
+
+ // Note that the pattern here for ADDS is subtle. Canonically CMP
+ // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
+ // ADDS a, (-b). This is not true in general.
+ def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
+ (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
+ !strconcat(cmpasmop, " $Rn, $Imm12"),
+ [(set NZCV,
+ (A64cmp GPRsp:$Rn, cmp_imm_operand:$Imm12))],
+ NoItinerary> {
+ let Rd = 0b11111;
+ let Defs = [NZCV];
+ let isCompare = 1;
+ }
+}
+
+
+multiclass addsubimm_shifts<string prefix, bit sf, bit op,
+ string asmop, string cmpasmop, string operand, string cmpoperand,
+ RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR> {
+ defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
+ asmop, cmpasmop,
+ !cast<Operand>(operand # "_lsl0"),
+ !cast<Operand>(cmpoperand # "_lsl0"),
+ GPR, GPRsp, ZR>;
+
+ defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
+ asmop, cmpasmop,
+ !cast<Operand>(operand # "_lsl12"),
+ !cast<Operand>(cmpoperand # "_lsl12"),
+ GPR, GPRsp, ZR>;
+}
+
+defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
+ "addsubimm_operand_i32_posimm",
+ "addsubimm_operand_i32_negimm",
+ GPR32, GPR32wsp, WZR>;
+defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
+ "addsubimm_operand_i64_posimm",
+ "addsubimm_operand_i64_negimm",
+ GPR64, GPR64xsp, XZR>;
+defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
+ "addsubimm_operand_i32_negimm",
+ "addsubimm_operand_i32_posimm",
+ GPR32, GPR32wsp, WZR>;
+defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
+ "addsubimm_operand_i64_negimm",
+ "addsubimm_operand_i64_posimm",
+ GPR64, GPR64xsp, XZR>;
+
+multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
+ def _fromsp : InstAlias<"mov $Rd, $Rn",
+ (addop GPRsp:$Rd, SP:$Rn, 0),
+ 0b1>;
+
+ def _tosp : InstAlias<"mov $Rd, $Rn",
+ (addop SP:$Rd, GPRsp:$Rn, 0),
+ 0b1>;
+}
+
+// Recall Rxsp is a RegisterClass containing *just* xsp.
+defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
+defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
+
+//===-------------------------------
+// 1. The "shifed register" operands. Shared with logical insts.
+//===-------------------------------
+
+multiclass shift_operands<string prefix, string form> {
+ def _asmoperand_i32 : AsmOperandClass {
+ let Name = "Shift" # form # "i32";
+ let RenderMethod = "addShiftOperands";
+ let PredicateMethod = "isShift<A64SE::" # form # ", false>";
+ let DiagnosticType = "AddSubRegShift32";
+ }
+
+ // Note that the operand type is intentionally i64 because the DAGCombiner
+ // puts these into a canonical form.
+ def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
+ let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+ let DecoderMethod = "Decode32BitShiftOperand";
+ }
+
+ def _asmoperand_i64 : AsmOperandClass {
+ let Name = "Shift" # form # "i64";
+ let RenderMethod = "addShiftOperands";
+ let PredicateMethod = "isShift<A64SE::" # form # ", true>";
+ let DiagnosticType = "AddSubRegShift64";
+ }
+
+ def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
+ let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
+ }
+}
+
+defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
+defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
+defm asr_operand : shift_operands<"asr_operand", "ASR">;
+
+// Not used for add/sub, but defined here for completeness. The "logical
+// (shifted register)" instructions *do* have an ROR variant.
+defm ror_operand : shift_operands<"ror_operand", "ROR">;
+
+//===-------------------------------
+// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
+//===-------------------------------
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
+ string asmop, SDPatternOperator opfrag, string sty,
+ RegisterClass GPR, list<Register> defs> {
+ let isCommutable = commutable, Defs = defs in {
+ def _lsl : A64I_addsubshift<sf, op, s, 0b00,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_addsubshift<sf, op, s, 0b01,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_addsubshift<sf, op, s, 0b10,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+ GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
+ string asmop, SDPatternOperator opfrag,
+ list<Register> defs> {
+ defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
+ commutable, asmop, opfrag, "i64", GPR64, defs>;
+ defm www : addsub_shifts<prefix # "www", 0b0, op, s,
+ commutable, asmop, opfrag, "i32", GPR32, defs>;
+}
+
+
+defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
+defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
+
+defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
+defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
+
+//===-------------------------------
+// 1. The NEG/NEGS aliases
+//===-------------------------------
+
+multiclass neg_alias<Instruction INST, RegisterClass GPR,
+ Register ZR, Operand shift_operand, SDNode shiftop> {
+ def : InstAlias<"neg $Rd, $Rm, $Imm6",
+ (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+ def : Pat<(sub 0, (shiftop GPR:$Rm, shift_operand:$Imm6)),
+ (INST ZR, GPR:$Rm, shift_operand:$Imm6)>;
+}
+
+defm : neg_alias<SUBwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+defm : neg_alias<SUBwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+defm : neg_alias<SUBwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+def : Pat<(sub 0, GPR32:$Rm), (SUBwww_lsl WZR, GPR32:$Rm, 0)>;
+
+defm : neg_alias<SUBxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+defm : neg_alias<SUBxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+defm : neg_alias<SUBxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def : Pat<(sub 0, GPR64:$Rm), (SUBxxx_lsl XZR, GPR64:$Rm, 0)>;
+
+// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
+// be involved.
+class negs_alias<Instruction INST, RegisterClass GPR,
+ Register ZR, Operand shift_operand, SDNode shiftop>
+ : InstAlias<"negs $Rd, $Rm, $Imm6",
+ (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+
+def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
+def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
+def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
+def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
+def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
+def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+
+//===-------------------------------
+// 1. The CMP/CMN aliases
+//===-------------------------------
+
+multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
+ string asmop, SDPatternOperator opfrag, string sty,
+ RegisterClass GPR> {
+ let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
+ def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag GPR:$Rn, (shl GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag GPR:$Rn, (srl GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
+ [(set NZCV, (opfrag GPR:$Rn, (sra GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, "i32", GPR32>;
+defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, "i64", GPR64>;
+
+defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, "i32", GPR32>;
+defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, "i64", GPR64>;
+
+//===----------------------------------------------------------------------===//
+// Add-subtract (with carry) instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
+
+multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
+ let Uses = [NZCV] in {
+ def www : A64I_addsubcarry<0b0, op, s, 0b000000,
+ (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [], NoItinerary>;
+
+ def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
+ (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [], NoItinerary>;
+ }
+}
+
+let isCommutable = 1 in {
+ defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
+}
+
+defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
+
+let Defs = [NZCV] in {
+ let isCommutable = 1 in {
+ defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
+ }
+
+ defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
+}
+
+def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
+def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+
+// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
+// addition). So the flag-setting instructions are appropriate.
+def : Pat<(adde GPR32:$Rn, GPR32:$Rm), (ADCSwww GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(adde GPR64:$Rn, GPR64:$Rm), (ADCSxxx GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(sube GPR32:$Rn, GPR32:$Rm), (SBCSwww GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(sube GPR64:$Rn, GPR64:$Rm), (SBCSxxx GPR64:$Rn, GPR64:$Rm)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield
+//===----------------------------------------------------------------------===//
+// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
+// UBFIZ, UBFX
+
+// Because of the rather complicated nearly-overlapping aliases, the decoding of
+// this range of instructions is handled manually. The architectural
+// instructions are BFM, SBFM and UBFM but a disassembler should never produce
+// these.
+//
+// In the end, the best option was to use BFM instructions for decoding under
+// almost all circumstances, but to create aliasing *Instructions* for each of
+// the canonical forms and specify a completely custom decoder which would
+// substitute the correct MCInst as needed.
+//
+// This also simplifies instruction selection, parsing etc because the MCInsts
+// have a shape that's closer to their use in code.
+
+//===-------------------------------
+// 1. The architectural BFM instructions
+//===-------------------------------
+
+def uimm5_asmoperand : AsmOperandClass {
+ let Name = "UImm5";
+ let PredicateMethod = "isUImm<5>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm5";
+}
+
+def uimm6_asmoperand : AsmOperandClass {
+ let Name = "UImm6";
+ let PredicateMethod = "isUImm<6>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm6";
+}
+
+def bitfield32_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
+ let ParserMatchClass = uimm5_asmoperand;
+
+ let DecoderMethod = "DecodeBitfield32ImmOperand";
+}
+
+
+def bitfield64_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
+ let ParserMatchClass = uimm6_asmoperand;
+
+ // Default decoder works in 64-bit case: the 6-bit field can take any value.
+}
+
+multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ }
+}
+
+defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
+defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
+
+// BFM instructions modify the destination register rather than defining it
+// completely.
+def BFMwwii :
+ A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
+ "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ let Constraints = "$src = $Rd";
+}
+
+def BFMxxii :
+ A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
+ "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ let DecoderMethod = "DecodeBitfieldInstruction";
+ let Constraints = "$src = $Rd";
+}
+
+
+//===-------------------------------
+// 2. Extend aliases to 64-bit dest
+//===-------------------------------
+
+// Unfortunately the extensions that end up as 64-bits cannot be handled by an
+// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
+// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
+// not capable of such a map as far as I'm aware
+
+// Note that these instructions are strictly more specific than the
+// BFM ones (in ImmR) so they can handle their own decoding.
+class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, string asmop,
+ bits<6> imms, dag pattern>
+ : A64I_bitfield<sf, opc, sf,
+ (outs GPRDest:$Rd), (ins GPR32:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [(set GPRDest:$Rd, pattern)], NoItinerary> {
+ let ImmR = 0b000000;
+ let ImmS = imms;
+}
+
+// Signed extensions
+def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtb", 7,
+ (sext_inreg (anyext GPR32:$Rn), i8)>;
+def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, "sxtb", 7,
+ (sext_inreg GPR32:$Rn, i8)>;
+def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxth", 15,
+ (sext_inreg (anyext GPR32:$Rn), i16)>;
+def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, "sxth", 15,
+ (sext_inreg GPR32:$Rn, i16)>;
+def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, "sxtw", 31, (sext GPR32:$Rn)>;
+
+// Unsigned extensions
+def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, "uxtb", 7,
+ (and GPR32:$Rn, 255)>;
+def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, "uxth", 15,
+ (and GPR32:$Rn, 65535)>;
+
+// The 64-bit unsigned variants are not strictly architectural but recommended
+// for consistency.
+let isAsmParserOnly = 1 in {
+ def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxtb", 7,
+ (and (anyext GPR32:$Rn), 255)>;
+ def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, "uxth", 15,
+ (and (anyext GPR32:$Rn), 65535)>;
+}
+
+// Extra patterns for when the source register is actually 64-bits
+// too. There's no architectural difference here, it's just LLVM
+// shinanigans. There's no need for equivalent zero-extension patterns
+// because they'll already be caught by logical (immediate) matching.
+def : Pat<(sext_inreg GPR64:$Rn, i8),
+ (SXTBxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+def : Pat<(sext_inreg GPR64:$Rn, i16),
+ (SXTHxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+def : Pat<(sext_inreg GPR64:$Rn, i32),
+ (SXTWxw (EXTRACT_SUBREG GPR64:$Rn, sub_32))>;
+
+
+//===-------------------------------
+// 3. Aliases for ASR and LSR (the simple shifts)
+//===-------------------------------
+
+// These also handle their own decoding because ImmS being set makes
+// them take precedence over BFM.
+multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
+ def wwi : A64I_bitfield<0b0, opc, 0b0,
+ (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+ [(set GPR32:$Rd, (opnode GPR32:$Rn, bitfield32_imm:$ImmR))],
+ NoItinerary> {
+ let ImmS = 31;
+ }
+
+ def xxi : A64I_bitfield<0b1, opc, 0b1,
+ (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
+ [(set GPR64:$Rd, (opnode GPR64:$Rn, bitfield64_imm:$ImmR))],
+ NoItinerary> {
+ let ImmS = 63;
+ }
+
+}
+
+defm ASR : A64I_shift<0b00, "asr", sra>;
+defm LSR : A64I_shift<0b10, "lsr", srl>;
+
+//===-------------------------------
+// 4. Aliases for LSL
+//===-------------------------------
+
+// Unfortunately LSL and subsequent aliases are much more complicated. We need
+// to be able to say certain output instruction fields depend in a complex
+// manner on combinations of input assembly fields).
+//
+// MIOperandInfo *might* have been able to do it, but at the cost of
+// significantly more C++ code.
+
+// N.b. contrary to usual practice these operands store the shift rather than
+// the machine bits in an MCInst. The complexity overhead of consistency
+// outweighed the benefits in this case (custom asmparser, printer and selection
+// vs custom encoder).
+def bitfield32_lsl_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let ParserMatchClass = uimm5_asmoperand;
+ let EncoderMethod = "getBitfield32LSLOpValue";
+}
+
+def bitfield64_lsl_imm : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let ParserMatchClass = uimm6_asmoperand;
+ let EncoderMethod = "getBitfield64LSLOpValue";
+}
+
+class A64I_bitfield_lsl<bit sf, RegisterClass GPR, Operand operand>
+ : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
+ "lsl\t$Rd, $Rn, $FullImm",
+ [(set GPR:$Rd, (shl GPR:$Rn, operand:$FullImm))],
+ NoItinerary> {
+ bits<12> FullImm;
+ let ImmR = FullImm{5-0};
+ let ImmS = FullImm{11-6};
+
+ // No disassembler allowed because it would overlap with BFM which does the
+ // actual work.
+ let isAsmParserOnly = 1;
+}
+
+def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, bitfield32_lsl_imm>;
+def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, bitfield64_lsl_imm>;
+
+//===-------------------------------
+// 5. Aliases for bitfield extract instructions
+//===-------------------------------
+
+def bfx32_width_asmoperand : AsmOperandClass {
+ let Name = "BFX32Width";
+ let PredicateMethod = "isBitfieldWidth<32>";
+ let RenderMethod = "addBFXWidthOperands";
+ let DiagnosticType = "Width32";
+}
+
+def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
+ let PrintMethod = "printBFXWidthOperand";
+ let ParserMatchClass = bfx32_width_asmoperand;
+}
+
+def bfx64_width_asmoperand : AsmOperandClass {
+ let Name = "BFX64Width";
+ let PredicateMethod = "isBitfieldWidth<64>";
+ let RenderMethod = "addBFXWidthOperands";
+ let DiagnosticType = "Width64";
+}
+
+def bfx64_width : Operand<i64> {
+ let PrintMethod = "printBFXWidthOperand";
+ let ParserMatchClass = bfx64_width_asmoperand;
+}
+
+
+multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [(set GPR32:$Rd, (op GPR32:$Rn, imm:$ImmR, imm:$ImmS))],
+ NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [(set GPR64:$Rd, (op GPR64:$Rn, imm:$ImmR, imm:$ImmS))],
+ NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+}
+
+defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
+defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
+
+// Again, variants based on BFM modify Rd so need it as an input too.
+def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
+ "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
+ "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+// SBFX instructions can do a 1-instruction sign-extension of boolean values.
+def : Pat<(sext_inreg GPR64:$Rn, i1), (SBFXxxii GPR64:$Rn, 0, 0)>;
+def : Pat<(sext_inreg GPR32:$Rn, i1), (SBFXwwii GPR32:$Rn, 0, 0)>;
+def : Pat<(i64 (sext_inreg (anyext GPR32:$Rn), i1)),
+ (SBFXxxii (SUBREG_TO_REG (i64 0), GPR32:$Rn, sub_32), 0, 0)>;
+
+// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
+// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
+def : Pat<(zext GPR32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii GPR32:$Rn, 0, 31),
+ sub_32)>;
+
+//===-------------------------------
+// 6. Aliases for bitfield insert instructions
+//===-------------------------------
+
+def bfi32_lsb_asmoperand : AsmOperandClass {
+ let Name = "BFI32LSB";
+ let PredicateMethod = "isUImm<5>";
+ let RenderMethod = "addBFILSBOperands<32>";
+ let DiagnosticType = "UImm5";
+}
+
+def bfi32_lsb : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
+ let PrintMethod = "printBFILSBOperand<32>";
+ let ParserMatchClass = bfi32_lsb_asmoperand;
+}
+
+def bfi64_lsb_asmoperand : AsmOperandClass {
+ let Name = "BFI64LSB";
+ let PredicateMethod = "isUImm<6>";
+ let RenderMethod = "addBFILSBOperands<64>";
+ let DiagnosticType = "UImm6";
+}
+
+def bfi64_lsb : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
+ let PrintMethod = "printBFILSBOperand<64>";
+ let ParserMatchClass = bfi64_lsb_asmoperand;
+}
+
+// Width verification is performed during conversion so width operand can be
+// shared between 32/64-bit cases. Still needed for the print method though
+// because ImmR encodes "width - 1".
+def bfi32_width_asmoperand : AsmOperandClass {
+ let Name = "BFI32Width";
+ let PredicateMethod = "isBitfieldWidth<32>";
+ let RenderMethod = "addBFIWidthOperands";
+ let DiagnosticType = "Width32";
+}
+
+def bfi32_width : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
+ let PrintMethod = "printBFIWidthOperand";
+ let ParserMatchClass = bfi32_width_asmoperand;
+}
+
+def bfi64_width_asmoperand : AsmOperandClass {
+ let Name = "BFI64Width";
+ let PredicateMethod = "isBitfieldWidth<64>";
+ let RenderMethod = "addBFIWidthOperands";
+ let DiagnosticType = "Width64";
+}
+
+def bfi64_width : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
+ let PrintMethod = "printBFIWidthOperand";
+ let ParserMatchClass = bfi64_width_asmoperand;
+}
+
+multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
+ def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+
+ def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+ !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
+ [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ }
+}
+
+defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">;
+defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">;
+
+
+def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
+ (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
+ "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
+ (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
+ "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary> {
+ // As above, no disassembler allowed.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src = $Rd";
+}
+
+//===----------------------------------------------------------------------===//
+// Compare and branch (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: CBZ, CBNZ
+
+class label_asmoperand<int width, int scale> : AsmOperandClass {
+ let Name = "Label" # width # "_" # scale;
+ let PredicateMethod = "isLabel<" # width # "," # scale # ">";
+ let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
+ let DiagnosticType = "Label";
+}
+
+def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
+
+// All conditional immediate branches are the same really: 19 signed bits scaled
+// by the instruction-size (4).
+def bcc_target : Operand<OtherVT> {
+ // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+ let ParserMatchClass = label_wid19_scal4_asmoperand;
+ let PrintMethod = "printLabelOperand<19, 4>";
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
+ let OperandType = "OPERAND_PCREL";
+}
+
+multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
+ let isBranch = 1, isTerminator = 1 in {
+ def x : A64I_cmpbr<0b1, op,
+ (outs),
+ (ins GPR64:$Rt, bcc_target:$Label),
+ !strconcat(asmop,"\t$Rt, $Label"),
+ [(A64br_cc (A64cmp GPR64:$Rt, 0), SETOP, bb:$Label)],
+ NoItinerary>;
+
+ def w : A64I_cmpbr<0b0, op,
+ (outs),
+ (ins GPR32:$Rt, bcc_target:$Label),
+ !strconcat(asmop,"\t$Rt, $Label"),
+ [(A64br_cc (A64cmp GPR32:$Rt, 0), SETOP, bb:$Label)],
+ NoItinerary>;
+ }
+}
+
+defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{
+ return Imm == A64CC::EQ;
+}]> >;
+defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
+ return Imm == A64CC::NE;
+}]> >;
+
+//===----------------------------------------------------------------------===//
+// Conditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B.cc
+
+def cond_code_asmoperand : AsmOperandClass {
+ let Name = "CondCode";
+ let DiagnosticType = "CondCode";
+}
+
+def cond_code : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm <= 15;
+}]> {
+ let PrintMethod = "printCondCodeOperand";
+ let ParserMatchClass = cond_code_asmoperand;
+}
+
+def Bcc : A64I_condbr<0b0, 0b0, (outs),
+ (ins cond_code:$Cond, bcc_target:$Label),
+ "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
+ NoItinerary> {
+ let Uses = [NZCV];
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+def uimm4_asmoperand : AsmOperandClass {
+ let Name = "UImm4";
+ let PredicateMethod = "isUImm<4>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm4";
+}
+
+def uimm4 : Operand<i32> {
+ let ParserMatchClass = uimm4_asmoperand;
+}
+
+def uimm5 : Operand<i32> {
+ let ParserMatchClass = uimm5_asmoperand;
+}
+
+// The only difference between this operand and the one for instructions like
+// B.cc is that it's parsed manually. The other get parsed implicitly as part of
+// the mnemonic handling.
+def cond_code_op_asmoperand : AsmOperandClass {
+ let Name = "CondCodeOp";
+ let RenderMethod = "addCondCodeOperands";
+ let PredicateMethod = "isCondCode";
+ let ParserMethod = "ParseCondCodeOperand";
+ let DiagnosticType = "CondCode";
+}
+
+def cond_code_op : Operand<i32> {
+ let PrintMethod = "printCondCodeOperand";
+ let ParserMatchClass = cond_code_op_asmoperand;
+}
+
+class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+ : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
+ (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional compare (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CCMN, CCMP
+
+class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
+ : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
+def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
+def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
+def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
+
+//===----------------------------------------------------------------------===//
+// Conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
+
+// Condition code which is encoded as the inversion (semantically rather than
+// bitwise) in the instruction.
+def inv_cond_code_op_asmoperand : AsmOperandClass {
+ let Name = "InvCondCodeOp";
+ let RenderMethod = "addInvCondCodeOperands";
+ let PredicateMethod = "isCondCode";
+ let ParserMethod = "ParseCondCodeOperand";
+ let DiagnosticType = "CondCode";
+}
+
+def inv_cond_code_op : Operand<i32> {
+ let ParserMatchClass = inv_cond_code_op_asmoperand;
+}
+
+// Having a separate operand for the selectable use-case is debatable, but gives
+// consistency with cond_code.
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
+ return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
+}]>;
+
+def inv_cond_code
+ : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
+
+
+multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
+ SDPatternOperator select> {
+ let Uses = [NZCV] in {
+ def wwwc : A64I_condsel<0b0, op, 0b0, op2,
+ (outs GPR32:$Rd),
+ (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+ [(set GPR32:$Rd, (select GPR32:$Rn, GPR32:$Rm))],
+ NoItinerary>;
+
+
+ def xxxc : A64I_condsel<0b1, op, 0b0, op2,
+ (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
+ [(set GPR64:$Rd, (select GPR64:$Rn, GPR64:$Rm))],
+ NoItinerary>;
+ }
+}
+
+def simple_select
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
+
+class complex_select<SDPatternOperator opnode>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
+
+
+defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
+defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
+ complex_select<PatFrag<(ops node:$val),
+ (add node:$val, 1)>>>;
+defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
+defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
+
+// Now the instruction aliases, which fit nicely into LLVM's model:
+
+def : InstAlias<"cset $Rd, $Cond",
+ (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cset $Rd, $Cond",
+ (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+ (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"csetm $Rd, $Cond",
+ (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+ (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinc $Rd, $Rn, $Cond",
+ (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+ (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cinv $Rd, $Rn, $Cond",
+ (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+ (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
+def : InstAlias<"cneg $Rd, $Rn, $Cond",
+ (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
+
+// Finally some helper patterns.
+
+// For CSET (a.k.a. zero-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+ (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+ (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
+ (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
+ (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// For CSETM (a.k.a. sign-extension of icmp)
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+ (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+ (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
+
+def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
+ (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
+def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
+ (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
+
+// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
+// commutativity. The instructions are to complex for isCommutable to be used,
+// so we have to create the patterns manually:
+
+// No commutable pattern for CSEL since the commuted version is isomorphic.
+
+// CSINC
+def :Pat<(A64select_cc NZCV, (add GPR32:$Rm, 1), GPR32:$Rn,
+ inv_cond_code:$Cond),
+ (CSINCwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (add GPR64:$Rm, 1), GPR64:$Rn,
+ inv_cond_code:$Cond),
+ (CSINCxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+
+// CSINV
+def :Pat<(A64select_cc NZCV, (not GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond),
+ (CSINVwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (not GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond),
+ (CSINVxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+
+// CSNEG
+def :Pat<(A64select_cc NZCV, (ineg GPR32:$Rm), GPR32:$Rn, inv_cond_code:$Cond),
+ (CSNEGwwwc GPR32:$Rn, GPR32:$Rm, inv_cond_code:$Cond)>;
+def :Pat<(A64select_cc NZCV, (ineg GPR64:$Rm), GPR64:$Rn, inv_cond_code:$Cond),
+ (CSNEGxxxc GPR64:$Rn, GPR64:$Rm, inv_cond_code:$Cond)>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (1 source) instructions
+//===----------------------------------------------------------------------===//
+// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
+
+// We define an unary operator which always fails. We will use this to
+// define unary operators that cannot be matched.
+
+class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
+ list<dag> patterns, RegisterClass GPRrc,
+ InstrItinClass itin>:
+ A64I_dp_1src<sf,
+ 0,
+ 0b00000,
+ opcode,
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ (outs GPRrc:$Rd),
+ (ins GPRrc:$Rn),
+ patterns,
+ itin>;
+
+multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
+ let hasSideEffects = 0 in {
+ def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
+ def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
+ }
+}
+
+defm RBIT : A64I_dp_1src<0b000000, "rbit">;
+defm CLS : A64I_dp_1src<0b000101, "cls">;
+defm CLZ : A64I_dp_1src<0b000100, "clz">;
+
+def : Pat<(ctlz GPR32:$Rn), (CLZww GPR32:$Rn)>;
+def : Pat<(ctlz GPR64:$Rn), (CLZxx GPR64:$Rn)>;
+def : Pat<(ctlz_zero_undef GPR32:$Rn), (CLZww GPR32:$Rn)>;
+def : Pat<(ctlz_zero_undef GPR64:$Rn), (CLZxx GPR64:$Rn)>;
+
+def : Pat<(cttz GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>;
+def : Pat<(cttz GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>;
+def : Pat<(cttz_zero_undef GPR32:$Rn), (CLZww (RBITww GPR32:$Rn))>;
+def : Pat<(cttz_zero_undef GPR64:$Rn), (CLZxx (RBITxx GPR64:$Rn))>;
+
+
+def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
+ [(set GPR32:$Rd, (bswap GPR32:$Rn))],
+ GPR32, NoItinerary>;
+def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
+ [(set GPR64:$Rd, (bswap GPR64:$Rn))],
+ GPR64, NoItinerary>;
+def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
+ [(set GPR64:$Rd, (bswap (rotr GPR64:$Rn, (i64 32))))],
+ GPR64, NoItinerary>;
+def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
+ [(set GPR32:$Rd, (bswap (rotr GPR32:$Rn, (i64 16))))],
+ GPR32,
+ NoItinerary>;
+def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
+// LSR, ASR, ROR
+
+
+class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
+ RegisterClass GPRsp,
+ InstrItinClass itin>:
+ A64I_dp_2src<sf,
+ opcode,
+ 0,
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ (outs GPRsp:$Rd),
+ (ins GPRsp:$Rn, GPRsp:$Rm),
+ patterns,
+ itin>;
+
+multiclass dp_2src_crc<bit c, string asmop> {
+ def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
+ !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
+ def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
+ !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
+ def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
+ !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
+ def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
+ !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
+ (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
+ NoItinerary>;
+}
+
+multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
+ def www : dp_2src_impl<0b0,
+ opcode,
+ asmop,
+ [(set GPR32:$Rd,
+ (op GPR32:$Rn, (i64 (zext GPR32:$Rm))))],
+ GPR32,
+ NoItinerary>;
+ def xxx : dp_2src_impl<0b1,
+ opcode,
+ asmop,
+ [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))],
+ GPR64,
+ NoItinerary>;
+}
+
+
+multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
+ def www : dp_2src_impl<0b0,
+ opcode,
+ asmop,
+ [(set GPR32:$Rd, (op GPR32:$Rn, GPR32:$Rm))],
+ GPR32,
+ NoItinerary>;
+ def xxx : dp_2src_impl<0b1,
+ opcode,
+ asmop,
+ [(set GPR64:$Rd, (op GPR64:$Rn, GPR64:$Rm))],
+ GPR64,
+ NoItinerary>;
+}
+
+// Here we define the data processing 2 source instructions.
+defm CRC32 : dp_2src_crc<0b0, "crc32">;
+defm CRC32C : dp_2src_crc<0b1, "crc32c">;
+
+defm UDIV : dp_2src<0b000010, "udiv", udiv>;
+defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
+
+defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
+defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
+defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
+defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
+
+// Extra patterns for an incoming 64-bit value for a 32-bit
+// operation. Since the LLVM operations are undefined (as in C) if the
+// RHS is out of range, it's perfectly permissible to discard the high
+// bits of the GPR64.
+def : Pat<(shl GPR32:$Rn, GPR64:$Rm),
+ (LSLVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(srl GPR32:$Rn, GPR64:$Rm),
+ (LSRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(sra GPR32:$Rn, GPR64:$Rm),
+ (ASRVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+def : Pat<(rotr GPR32:$Rn, GPR64:$Rm),
+ (RORVwww GPR32:$Rn, (EXTRACT_SUBREG GPR64:$Rm, sub_32))>;
+
+// Here we define the aliases for the data processing 2 source instructions.
+def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
+def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
+def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
+def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
+
+//===----------------------------------------------------------------------===//
+// Data Processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
+// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
+
+class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
+ RegisterClass SrcReg, string asmop, dag pattern>
+ : A64I_dp3<sf, opcode,
+ (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
+ [(set AccReg:$Rd, pattern)], NoItinerary> {
+ RegisterClass AccGPR = AccReg;
+ RegisterClass SrcGPR = SrcReg;
+}
+
+def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, GPR32, "madd",
+ (add GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>;
+def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, GPR64, "madd",
+ (add GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>;
+
+def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, GPR32, "msub",
+ (sub GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm))>;
+def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, GPR64, "msub",
+ (sub GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm))>;
+
+def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, GPR32, "smaddl",
+ (add GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, GPR32, "smsubl",
+ (sub GPR64:$Ra, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+
+def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, GPR32, "umaddl",
+ (add GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, GPR32, "umsubl",
+ (sub GPR64:$Ra, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+
+let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
+ def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm),
+ "umulh\t$Rd, $Rn, $Rm",
+ [(set GPR64:$Rd, (mulhu GPR64:$Rn, GPR64:$Rm))],
+ NoItinerary>;
+
+ def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm),
+ "smulh\t$Rd, $Rn, $Rm",
+ [(set GPR64:$Rd, (mulhs GPR64:$Rn, GPR64:$Rm))],
+ NoItinerary>;
+}
+
+multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
+ Register ZR, dag pattern> {
+ def : InstAlias<asmop # " $Rd, $Rn, $Rm",
+ (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+
+ def : Pat<pattern, (INST INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
+}
+
+defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul GPR32:$Rn, GPR32:$Rm)>;
+defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul GPR64:$Rn, GPR64:$Rm)>;
+
+defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
+ (sub 0, (mul GPR32:$Rn, GPR32:$Rm))>;
+defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
+ (sub 0, (mul GPR64:$Rn, GPR64:$Rm))>;
+
+defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
+ (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm))>;
+defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
+ (sub 0, (mul (i64 (sext GPR32:$Rn)), (sext GPR32:$Rm)))>;
+
+defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
+ (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm))>;
+defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
+ (sub 0, (mul (i64 (zext GPR32:$Rn)), (zext GPR32:$Rm)))>;
+
+
+//===----------------------------------------------------------------------===//
+// Exception generation
+//===----------------------------------------------------------------------===//
+// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
+
+def uimm16_asmoperand : AsmOperandClass {
+ let Name = "UImm16";
+ let PredicateMethod = "isUImm<16>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm16";
+}
+
+def uimm16 : Operand<i32> {
+ let ParserMatchClass = uimm16_asmoperand;
+}
+
+class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
+ : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
+ !strconcat(asmop, "\t$UImm16"), [], NoItinerary> {
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
+def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
+def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
+def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
+def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
+def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
+
+def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
+def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
+def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
+
+// The immediate is optional for the DCPS instructions, defaulting to 0.
+def : InstAlias<"dcps1", (DCPS1i 0)>;
+def : InstAlias<"dcps2", (DCPS2i 0)>;
+def : InstAlias<"dcps3", (DCPS3i 0)>;
+
+//===----------------------------------------------------------------------===//
+// Extract (immediate)
+//===----------------------------------------------------------------------===//
+// Contains: EXTR + alias ROR
+
+def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
+ (outs GPR32:$Rd),
+ (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
+ "extr\t$Rd, $Rn, $Rm, $LSB",
+ [(set GPR32:$Rd,
+ (A64Extr GPR32:$Rn, GPR32:$Rm, imm:$LSB))],
+ NoItinerary>;
+def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
+ (outs GPR64:$Rd),
+ (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
+ "extr\t$Rd, $Rn, $Rm, $LSB",
+ [(set GPR64:$Rd,
+ (A64Extr GPR64:$Rn, GPR64:$Rm, imm:$LSB))],
+ NoItinerary>;
+
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+ (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
+def : InstAlias<"ror $Rd, $Rs, $LSB",
+ (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
+
+def : Pat<(rotr GPR32:$Rn, bitfield32_imm:$LSB),
+ (EXTRwwwi GPR32:$Rn, GPR32:$Rn, bitfield32_imm:$LSB)>;
+def : Pat<(rotr GPR64:$Rn, bitfield64_imm:$LSB),
+ (EXTRxxxi GPR64:$Rn, GPR64:$Rn, bitfield64_imm:$LSB)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCMP, FCMPE
+
+def fpzero_asmoperand : AsmOperandClass {
+ let Name = "FPZero";
+ let ParserMethod = "ParseFPImmOperand";
+ let DiagnosticType = "FPZero";
+}
+
+def fpz32 : Operand<f32>,
+ ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+}
+
+def fpz64 : Operand<f64>,
+ ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+}
+
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2,
+ dag pattern> {
+ def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
+ (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2),
+ [pattern], NoItinerary> {
+ let Defs = [NZCV];
+ }
+
+ def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
+ (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+ }
+}
+
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm",
+ (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>;
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm",
+ (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>;
+
+// What would be Rm should be written as 0, but anything is valid for
+// disassembly so we can't set the bits
+let PostEncoderMethod = "fixFCMPImm" in {
+ defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm",
+ (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>;
+
+ defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm",
+ (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional compare instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCCMP, FCCMPE
+
+class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
+ : A64I_fpccmp<0b0, 0b0, type, op,
+ (outs),
+ (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
+ !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
+ [], NoItinerary> {
+ let Defs = [NZCV];
+}
+
+def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
+def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
+def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
+def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
+
+//===----------------------------------------------------------------------===//
+// Floating-point conditional select instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCSEL
+
+let Uses = [NZCV] in {
+ def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
+ "fcsel\t$Rd, $Rn, $Rm, $Cond",
+ [(set FPR32:$Rd,
+ (simple_select (f32 FPR32:$Rn),
+ FPR32:$Rm))],
+ NoItinerary>;
+
+
+ def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
+ (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
+ "fcsel\t$Rd, $Rn, $Rm, $Cond",
+ [(set FPR64:$Rd,
+ (simple_select (f64 FPR64:$Rn),
+ FPR64:$Rm))],
+ NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (1 source)
+//===----------------------------------------------------------------------===//
+// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
+
+def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
+ [{ (void)N; return false; }]>;
+
+// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
+// syntax. Default to no pattern because most are odd enough not to have one.
+multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
+ SDPatternOperator opnode = FPNoUnop> {
+ def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
+ !strconcat(asmstr, "\t$Rd, $Rn"),
+ [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn))],
+ NoItinerary>;
+
+ def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
+ !strconcat(asmstr, "\t$Rd, $Rn"),
+ [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn))],
+ NoItinerary>;
+}
+
+defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">;
+defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
+defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
+defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
+
+defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
+defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
+defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
+defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
+defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
+defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
+defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
+
+// The FCVT instrucitons have different source and destination register-types,
+// but the fields are uniform everywhere a D-register (say) crops up. Package
+// this information in a Record.
+class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
+ RegisterClass Class = rc;
+ ValueType VT = vt;
+ bit t1 = fld{1};
+ bit t0 = fld{0};
+}
+
+def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
+def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
+def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
+
+class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
+ : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
+ {0,0,0,1, DestReg.t1, DestReg.t0},
+ (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
+ "fcvt\t$Rd, $Rn",
+ [(set (DestReg.VT DestReg.Class:$Rd),
+ (opnode (SrcReg.VT SrcReg.Class:$Rn)))], NoItinerary>;
+
+def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
+def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
+def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
+def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
+def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
+def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (2 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
+
+def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
+ [{ (void)N; return false; }]>;
+
+multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
+ SDPatternOperator opnode> {
+ def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
+ (outs FPR32:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+ [(set (f32 FPR32:$Rd), (opnode FPR32:$Rn, FPR32:$Rm))],
+ NoItinerary>;
+
+ def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Rn, FPR64:$Rm),
+ !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
+ [(set (f64 FPR64:$Rd), (opnode FPR64:$Rn, FPR64:$Rm))],
+ NoItinerary>;
+}
+
+let isCommutable = 1 in {
+ defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
+ defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
+
+ // No patterns for these.
+ defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
+ defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
+ defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
+ defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
+
+ defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (fneg (fmul node:$lhs, node:$rhs))> >;
+}
+
+defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
+defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point data-processing (3 sources) instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMADD, FMSUB, FNMADD, FNMSUB
+
+def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma (fneg node:$Rn), node:$Rm, node:$Ra)>;
+def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma node:$Rn, node:$Rm, (fneg node:$Ra))>;
+def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
+ (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>;
+
+class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
+ bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
+ : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
+ (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
+ !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
+ [(set FPR:$Rd, (fmakind (VT FPR:$Rn), FPR:$Rm, FPR:$Ra))],
+ NoItinerary>;
+
+def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>;
+def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
+def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
+def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
+
+def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>;
+def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
+def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
+def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> fixed-point conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+// #1-#32 allowed, encoded as "64 - <specified imm>
+def fixedpos_asmoperand_i32 : AsmOperandClass {
+ let Name = "CVTFixedPos32";
+ let RenderMethod = "addCVTFixedPosOperands";
+ let PredicateMethod = "isCVTFixedPos<32>";
+ let DiagnosticType = "CVTFixedPos32";
+}
+
+// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
+def fixedpos_asmoperand_i64 : AsmOperandClass {
+ let Name = "CVTFixedPos64";
+ let RenderMethod = "addCVTFixedPosOperands";
+ let PredicateMethod = "isCVTFixedPos<64>";
+ let DiagnosticType = "CVTFixedPos64";
+}
+
+// We need the cartesian product of f32/f64 i32/i64 operands for
+// conversions:
+// + Selection needs to use operands of correct floating type
+// + Assembly parsing and decoding depend on integer width
+class cvtfix_i32_op<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
+ let ParserMatchClass = fixedpos_asmoperand_i32;
+ let DecoderMethod = "DecodeCVT32FixedPosOperand";
+ let PrintMethod = "printCVTFixedPosOperand";
+}
+
+class cvtfix_i64_op<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
+ let ParserMatchClass = fixedpos_asmoperand_i64;
+ let PrintMethod = "printCVTFixedPosOperand";
+}
+
+// Because of the proliferation of weird operands, it's not really
+// worth going for a multiclass here. Oh well.
+
+class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
+ RegisterClass GPR, RegisterClass FPR, Operand scale_op,
+ string asmop, SDNode cvtop>
+ : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
+ (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
+ !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+ [(set GPR:$Rd, (cvtop (fmul FPR:$Rn, scale_op:$Scale)))],
+ NoItinerary>;
+
+def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32,
+ cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32,
+ cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
+def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32,
+ cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
+def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32,
+ cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
+
+def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64,
+ cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64,
+ cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
+def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64,
+ cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
+def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64,
+ cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
+
+
+class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
+ RegisterClass FPR, RegisterClass GPR, Operand scale_op,
+ string asmop, SDNode cvtop>
+ : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
+ (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
+ !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
+ [(set FPR:$Rd, (fdiv (cvtop GPR:$Rn), scale_op:$Scale))],
+ NoItinerary>;
+
+def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32,
+ cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
+def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64,
+ cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
+def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32,
+ cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
+def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64,
+ cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
+def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32,
+ cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
+def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64,
+ cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
+def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32,
+ cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
+def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64,
+ cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point <-> integer conversion instructions
+//===----------------------------------------------------------------------===//
+// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
+
+class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass DestPR, RegisterClass SrcPR, string asmop>
+ : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>;
+
+multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
+ def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
+ GPR32, FPR32, asmop # "s">;
+ def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
+ GPR64, FPR32, asmop # "s">;
+ def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
+ GPR32, FPR32, asmop # "u">;
+ def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
+ GPR64, FPR32, asmop # "u">;
+
+ def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
+ GPR32, FPR64, asmop # "s">;
+ def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
+ GPR64, FPR64, asmop # "s">;
+ def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
+ GPR32, FPR64, asmop # "u">;
+ def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
+ GPR64, FPR64, asmop # "u">;
+}
+
+defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
+defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
+defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
+defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
+defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+
+def : Pat<(i32 (fp_to_sint FPR32:$Rn)), (FCVTZSws FPR32:$Rn)>;
+def : Pat<(i64 (fp_to_sint FPR32:$Rn)), (FCVTZSxs FPR32:$Rn)>;
+def : Pat<(i32 (fp_to_uint FPR32:$Rn)), (FCVTZUws FPR32:$Rn)>;
+def : Pat<(i64 (fp_to_uint FPR32:$Rn)), (FCVTZUxs FPR32:$Rn)>;
+def : Pat<(i32 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSwd FPR64:$Rn)>;
+def : Pat<(i64 (fp_to_sint (f64 FPR64:$Rn))), (FCVTZSxd FPR64:$Rn)>;
+def : Pat<(i32 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUwd FPR64:$Rn)>;
+def : Pat<(i64 (fp_to_uint (f64 FPR64:$Rn))), (FCVTZUxd FPR64:$Rn)>;
+
+multiclass A64I_inttofp<bit o0, string asmop> {
+ def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
+ def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
+ def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
+ def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
+}
+
+defm S : A64I_inttofp<0b0, "scvtf">;
+defm U : A64I_inttofp<0b1, "ucvtf">;
+
+def : Pat<(f32 (sint_to_fp GPR32:$Rn)), (SCVTFsw GPR32:$Rn)>;
+def : Pat<(f32 (sint_to_fp GPR64:$Rn)), (SCVTFsx GPR64:$Rn)>;
+def : Pat<(f64 (sint_to_fp GPR32:$Rn)), (SCVTFdw GPR32:$Rn)>;
+def : Pat<(f64 (sint_to_fp GPR64:$Rn)), (SCVTFdx GPR64:$Rn)>;
+def : Pat<(f32 (uint_to_fp GPR32:$Rn)), (UCVTFsw GPR32:$Rn)>;
+def : Pat<(f32 (uint_to_fp GPR64:$Rn)), (UCVTFsx GPR64:$Rn)>;
+def : Pat<(f64 (uint_to_fp GPR32:$Rn)), (UCVTFdw GPR32:$Rn)>;
+def : Pat<(f64 (uint_to_fp GPR64:$Rn)), (UCVTFdx GPR64:$Rn)>;
+
+def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
+def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
+def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
+def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+
+def : Pat<(i32 (bitconvert (f32 FPR32:$Rn))), (FMOVws FPR32:$Rn)>;
+def : Pat<(f32 (bitconvert (i32 GPR32:$Rn))), (FMOVsw GPR32:$Rn)>;
+def : Pat<(i64 (bitconvert (f64 FPR64:$Rn))), (FMOVxd FPR64:$Rn)>;
+def : Pat<(f64 (bitconvert (i64 GPR64:$Rn))), (FMOVdx GPR64:$Rn)>;
+
+def lane1_asmoperand : AsmOperandClass {
+ let Name = "Lane1";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "Lane1";
+}
+
+def lane1 : Operand<i32> {
+ let ParserMatchClass = lane1_asmoperand;
+ let PrintMethod = "printBareImmOperand";
+}
+
+let DecoderMethod = "DecodeFMOVLaneInstruction" in {
+ def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
+ (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
+ "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>;
+
+ def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
+ (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
+ "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
+}
+
+def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
+ (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
+
+def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
+ (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediate instructions
+//===----------------------------------------------------------------------===//
+// Contains: FMOV
+
+def fpimm_asmoperand : AsmOperandClass {
+ let Name = "FMOVImm";
+ let ParserMethod = "ParseFPImmOperand";
+ let DiagnosticType = "FPImm";
+}
+
+// The MCOperand for these instructions are the encoded 8-bit values.
+def SDXF_fpimm : SDNodeXForm<fpimm, [{
+ uint32_t Imm8;
+ A64Imms::isFPImm(N->getValueAPF(), Imm8);
+ return CurDAG->getTargetConstant(Imm8, MVT::i32);
+}]>;
+
+class fmov_operand<ValueType FT>
+ : Operand<i32>,
+ PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
+ SDXF_fpimm> {
+ let PrintMethod = "printFPImmOperand";
+ let ParserMatchClass = fpimm_asmoperand;
+}
+
+def fmov32_operand : fmov_operand<f32>;
+def fmov64_operand : fmov_operand<f64>;
+
+class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
+ Operand fmov_operand>
+ : A64I_fpimm<0b0, 0b0, type, 0b00000,
+ (outs Reg:$Rd),
+ (ins fmov_operand:$Imm8),
+ "fmov\t$Rd, $Imm8",
+ [(set (VT Reg:$Rd), fmov_operand:$Imm8)],
+ NoItinerary>;
+
+def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
+def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
+
+//===----------------------------------------------------------------------===//
+// Load-register (literal) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDR, LDRSW, PRFM
+
+def ldrlit_label_asmoperand : AsmOperandClass {
+ let Name = "LoadLitLabel";
+ let RenderMethod = "addLabelOperands<19, 4>";
+ let DiagnosticType = "Label";
+}
+
+def ldrlit_label : Operand<i64> {
+ let EncoderMethod = "getLoadLitLabelOpValue";
+
+ // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<19, 4>";
+ let ParserMatchClass = ldrlit_label_asmoperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+// Various instructions take an immediate value (which can always be used),
+// where some numbers have a symbolic name to make things easier. These operands
+// and the associated functions abstract away the differences.
+multiclass namedimm<string prefix, string mapper> {
+ def _asmoperand : AsmOperandClass {
+ let Name = "NamedImm" # prefix;
+ let PredicateMethod = "isUImm";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
+ let DiagnosticType = "NamedImm_" # prefix;
+ }
+
+ def _op : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+ let PrintMethod = "printNamedImmOperand<" # mapper # ">";
+ let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
+ }
+}
+
+defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
+
+class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
+ list<dag> patterns = []>
+ : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
+ "ldr\t$Rt, $Imm19", patterns, NoItinerary>;
+
+let mayLoad = 1 in {
+ def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
+ def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
+}
+
+def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
+def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+
+let mayLoad = 1 in {
+ def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+
+
+ def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins ldrlit_label:$Imm19),
+ "ldrsw\t$Rt, $Imm19",
+ [], NoItinerary>;
+
+ def PRFM_lit : A64I_LDRlit<0b11, 0b0,
+ (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
+ "prfm\t$Rt, $Imm19",
+ [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load-store exclusive instructions
+//===----------------------------------------------------------------------===//
+// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
+// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
+// STLRH, STLR, LDARB, LDARH, LDAR
+
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fiels since Rt and Rn are always used.
+
+// This operand parses a GPR64xsp register, followed by an optional immediate
+// #0.
+def GPR64xsp0_asmoperand : AsmOperandClass {
+ let Name = "GPR64xsp0";
+ let PredicateMethod = "isWrappedReg";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "ParseLSXAddressOperand";
+ // Diagnostics are provided by ParserMethod
+}
+
+def GPR64xsp0 : RegisterOperand<GPR64xsp> {
+ let ParserMatchClass = GPR64xsp0_asmoperand;
+}
+
+//===----------------------------------
+// Store-exclusive (releasing & normal)
+//===----------------------------------
+
+class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_stn <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
+ def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [],NoItinerary>;
+
+ def _word: A64I_SRexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
+ (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm STXR : A64I_SRex<"stxr", 0b000, "STXR">;
+defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
+
+//===----------------------------------
+// Loads
+//===----------------------------------
+
+class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tn <size,
+ opcode{2}, 1, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, [$Rn]"),
+ pat, itin> {
+ let mayLoad = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+multiclass A64I_LRex<string asmstr, bits<3> opcode> {
+ def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _word: A64I_LRexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
+ (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm LDXR : A64I_LRex<"ldxr", 0b000>;
+defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
+defm LDAR : A64I_LRex<"ldar", 0b101>;
+
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
+
+def : Pat<(atomic_load_acquire_8 GPR64xsp:$Rn), (LDAR_byte GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_16 GPR64xsp:$Rn), (LDAR_hword GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_32 GPR64xsp:$Rn), (LDAR_word GPR64xsp0:$Rn)>;
+def : Pat<(atomic_load_acquire_64 GPR64xsp:$Rn), (LDAR_dword GPR64xsp0:$Rn)>;
+
+//===----------------------------------
+// Store-release (no exclusivity)
+//===----------------------------------
+
+class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tn <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ return cast<AtomicSDNode>(N)->getOrdering() == Release;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+def atomic_store_release_64 : releasing_store<atomic_store_64>;
+
+multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
+ def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_8 GPR64xsp0:$Rn, GPR32:$Rt)],
+ NoItinerary>;
+
+ def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_16 GPR64xsp0:$Rn, GPR32:$Rt)],
+ NoItinerary>;
+
+ def _word: A64I_SLexs_impl<0b10, opcode, asmstr,
+ (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_32 GPR64xsp0:$Rn, GPR32:$Rt)],
+ NoItinerary>;
+
+ def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
+ (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
+ [(atomic_store_release_64 GPR64xsp0:$Rn, GPR64:$Rt)],
+ NoItinerary>;
+}
+
+defm STLR : A64I_SLex<"stlr", 0b101, "STLR">;
+
+//===----------------------------------
+// Store-exclusive pair (releasing & normal)
+//===----------------------------------
+
+class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_stt2n <size,
+ opcode{2}, 0, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
+ pat, itin> {
+ let mayStore = 1;
+}
+
+
+multiclass A64I_SPex<string asmstr, bits<3> opcode> {
+ def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
+ (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
+ GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
+ (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
+ GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm STXP : A64I_SPex<"stxp", 0b010>;
+defm STLXP : A64I_SPex<"stlxp", 0b011>;
+
+//===----------------------------------
+// Load-exclusive pair (acquiring & normal)
+//===----------------------------------
+
+class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
+ dag ins, list<dag> pat,
+ InstrItinClass itin> :
+ A64I_LDSTex_tt2n <size,
+ opcode{2}, 1, opcode{1}, opcode{0},
+ outs, ins,
+ !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
+ pat, itin>{
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
+}
+
+multiclass A64I_LPex<string asmstr, bits<3> opcode> {
+ def _word: A64I_LPexs_impl<0b10, opcode, asmstr,
+ (outs GPR32:$Rt, GPR32:$Rt2),
+ (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+
+ def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64xsp0:$Rn),
+ [], NoItinerary>;
+}
+
+defm LDXP : A64I_LPex<"ldxp", 0b010>;
+defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unscaled immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (register offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (unsigned immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register (immediate pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
+
+// Note that patterns are much later on in a completely separate section (they
+// need ADRPxi to be defined).
+
+//===-------------------------------
+// 1. Various operands needed
+//===-------------------------------
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+// The addressing mode for these instructions consists of an unsigned 12-bit
+// immediate which is scaled by the size of the memory access.
+//
+// We represent this in the MC layer by two operands:
+// 1. A base register.
+// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
+// would have '1' in this field.
+// This means that separate functions are needed for converting representations
+// which *are* aware of the intended access size.
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+//===-------------------------------
+// 1.1 Unsigned 12-bit immediate operands
+//===-------------------------------
+
+multiclass offsets_uimm12<int MemSize, string prefix> {
+ def uimm12_asmoperand : AsmOperandClass {
+ let Name = "OffsetUImm12_" # MemSize;
+ let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
+ let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreUImm12_" # MemSize;
+ }
+
+ // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
+ // complicates things beyond TableGen's ken.
+ def uimm12 : Operand<i64>,
+ ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
+
+ let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
+ let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
+ }
+}
+
+defm byte_ : offsets_uimm12<1, "byte_">;
+defm hword_ : offsets_uimm12<2, "hword_">;
+defm word_ : offsets_uimm12<4, "word_">;
+defm dword_ : offsets_uimm12<8, "dword_">;
+defm qword_ : offsets_uimm12<16, "qword_">;
+
+//===-------------------------------
+// 1.1 Signed 9-bit immediate operands
+//===-------------------------------
+
+// The MCInst is expected to store the bit-wise encoding of the value,
+// which amounts to lopping off the extended sign bits.
+def SDXF_simm9 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
+}]>;
+
+def simm9_asmoperand : AsmOperandClass {
+ let Name = "SImm9";
+ let PredicateMethod = "isSImm<9>";
+ let RenderMethod = "addSImmOperands<9>";
+ let DiagnosticType = "LoadStoreSImm9";
+}
+
+def simm9 : Operand<i64>,
+ ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
+ SDXF_simm9> {
+ let PrintMethod = "printOffsetSImm9Operand";
+ let ParserMatchClass = simm9_asmoperand;
+}
+
+
+//===-------------------------------
+// 1.3 Register offset extensions
+//===-------------------------------
+
+// The assembly-syntax for these addressing-modes is:
+// [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
+//
+// The essential semantics are:
+// + <amount> is a shift: #<log(transfer size)> or #0
+// + <R> can be W or X.
+// + If <R> is W, <extend> can be UXTW or SXTW
+// + If <R> is X, <extend> can be LSL or SXTX
+//
+// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
+// which will need separate instructions for LLVM type-consistency. We'll also
+// need separate operands, of course.
+multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
+ string Rm, string prefix> {
+ def regext_asmoperand : AsmOperandClass {
+ let Name = "AddrRegExtend_" # MemSize # "_" # Rm;
+ let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
+ let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
+ }
+
+ def regext : Operand<i64> {
+ let PrintMethod
+ = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
+
+ let DecoderMethod = "DecodeAddrRegExtendOperand";
+ let ParserMatchClass
+ = !cast<AsmOperandClass>(prefix # regext_asmoperand);
+ }
+}
+
+multiclass regexts_wx<int MemSize, string prefix> {
+ // Rm is an X-register if LSL or SXTX are specified as the shift.
+ defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
+
+ // Rm is a W-register if UXTW or SXTW are specified as the shift.
+ defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
+}
+
+defm byte_ : regexts_wx<1, "byte_">;
+defm hword_ : regexts_wx<2, "hword_">;
+defm word_ : regexts_wx<4, "word_">;
+defm dword_ : regexts_wx<8, "dword_">;
+defm qword_ : regexts_wx<16, "qword_">;
+
+
+//===------------------------------
+// 2. The instructions themselves.
+//===------------------------------
+
+// We have the following instructions to implement:
+// | | B | H | W | X |
+// |-----------------+-------+-------+-------+--------|
+// | unsigned str | STRB | STRH | STR | STR |
+// | unsigned ldr | LDRB | LDRH | LDR | LDR |
+// | signed ldr to W | LDRSB | LDRSH | - | - |
+// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
+
+// This will instantiate the LDR/STR instructions you'd expect to use for an
+// unsigned datatype (first two rows above) or floating-point register, which is
+// reasonably uniform across all access sizes.
+
+
+//===------------------------------
+// 2.1 Regular instructions
+//===------------------------------
+
+// This class covers the basic unsigned or irrelevantly-signed loads and stores,
+// to general-purpose and floating-point registers.
+
+class AddrParams<string prefix> {
+ Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
+
+ Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
+ Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
+}
+
+def byte_addrparams : AddrParams<"byte">;
+def hword_addrparams : AddrParams<"hword">;
+def word_addrparams : AddrParams<"word">;
+def dword_addrparams : AddrParams<"dword">;
+def qword_addrparams : AddrParams<"qword">;
+
+multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
+ bit high_opc, string asmsuffix,
+ RegisterClass GPR, AddrParams params> {
+ // Unsigned immediate
+ def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+ def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Register offset (four of these: load/store and Wm/Xm).
+ let mayLoad = 1 in {
+ def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
+ (outs GPR:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
+ (outs GPR:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ let mayStore = 1 in {
+ def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
+ params.regextWm:$Ext),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
+ params.regextXm:$Ext),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ // Unaligned immediate
+ def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+ def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Post-indexed
+ def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
+ (outs GPR64xsp:$Rn_wb),
+ (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let mayStore = 1;
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ // Pre-indexed
+ def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
+ (outs GPR64xsp:$Rn_wb),
+ (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let mayStore = 1;
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
+ (outs GPR:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+}
+
+// STRB/LDRB: First define the instructions
+defm LS8
+ : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
+
+// STRH/LDRH
+defm LS16
+ : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
+
+
+// STR/LDR to/from a W register
+defm LS32
+ : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
+
+// STR/LDR to/from an X register
+defm LS64
+ : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+
+// STR/LDR to/from a B register
+defm LSFP8
+ : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
+
+// STR/LDR to/from an H register
+defm LSFP16
+ : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
+
+// STR/LDR to/from an S register
+defm LSFP32
+ : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
+// STR/LDR to/from a D register
+defm LSFP64
+ : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
+// STR/LDR to/from a Q register
+defm LSFP128
+ : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
+ qword_addrparams>;
+
+//===------------------------------
+// 2.3 Signed loads
+//===------------------------------
+
+// Byte and half-word signed loads can both go into either an X or a W register,
+// so it's worth factoring out. Signed word loads don't fit because there is no
+// W version.
+multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
+ string prefix> {
+ // Unsigned offset
+ def w : A64I_LSunsigimm<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def x : A64I_LSunsigimm<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+ // Register offset
+ let mayLoad = 1 in {
+ def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ }
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+ def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+
+ let mayLoad = 1 in {
+ // Unaligned offset
+ def w_U : A64I_LSunalimm<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+ def x_U : A64I_LSunalimm<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+
+ // Post-indexed
+ def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
+ (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ // Pre-indexed
+ def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
+ (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+
+ def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+ }
+ } // let mayLoad = 1
+}
+
+// LDRSB
+defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
+// LDRSH
+defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
+
+// LDRSW: load a 32-bit register, sign-extending to 64-bits.
+def LDRSWx
+ : A64I_LSunsigimm<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
+ "ldrsw\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+ def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
+ "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+
+ def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
+ "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+}
+def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
+ (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
+
+
+def LDURSWx
+ : A64I_LSunalimm<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldursw\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+def LDRSWx_PostInd
+ : A64I_LSpostind<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrsw\t$Rt, [$Rn], $SImm9",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldrsw\t$Rt, [$Rn, $SImm9]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeSingleIndexedInstruction";
+}
+
+//===------------------------------
+// 2.4 Prefetch operations
+//===------------------------------
+
+def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
+ "prfm\t$Rt, [$Rn, $UImm12]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"prfm $Rt, [$Rn]",
+ (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+let mayLoad = 1 in {
+ def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR32:$Rm, dword_Wm_regext:$Ext),
+ "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+ def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, dword_Xm_regext:$Ext),
+ "prfm\t$Rt, [$Rn, $Rm, $Ext]",
+ [], NoItinerary>;
+}
+
+def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+ (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
+ GPR64:$Rm, 2)>;
+
+
+def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
+ (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "prfum\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"prfum $Rt, [$Rn]",
+ (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register (unprivileged) instructions
+//===----------------------------------------------------------------------===//
+// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
+
+// These instructions very much mirror the "unscaled immediate" loads, but since
+// there are no floating-point variants we need to split them out into their own
+// section to avoid instantiation of "ldtr d0, [sp]" etc.
+
+multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
+ string prefix> {
+ def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
+ (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
+ "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayStore = 1;
+ }
+
+ def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
+ (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ }
+
+ def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// STTRB/LDTRB: First define the instructions
+defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
+
+// STTRH/LDTRH
+defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
+
+// STTR/LDTR to/from a W register
+defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
+
+// STTR/LDTR to/from an X register
+defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
+
+// Now a class for the signed instructions that can go to either 32 or 64
+// bits...
+multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
+ let mayLoad = 1 in {
+ def w : A64I_LSunpriv<size, 0b0, 0b11,
+ (outs GPR32:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+
+ def x : A64I_LSunpriv<size, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary>;
+ }
+
+ def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
+
+ def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
+ (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+}
+
+// LDTRSB
+defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
+// LDTRSH
+defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
+
+// And finally LDTRSW which only goes to 64 bits.
+def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
+ (outs GPR64:$Rt),
+ (ins GPR64xsp:$Rn, simm9:$SImm9),
+ "ldtrsw\t$Rt, [$Rn, $SImm9]",
+ [], NoItinerary> {
+ let mayLoad = 1;
+}
+def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Load-store register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (post-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store register pair (pre-indexed) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STP, LDP, LDPSW
+//
+// and
+//
+//===----------------------------------------------------------------------===//
+// Load-store non-temporal register pair (offset) instructions
+//===----------------------------------------------------------------------===//
+// Contains: STNP, LDNP
+
+
+// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
+// know the access size via some means. An isolated operand does not have this
+// information unless told from here, which means we need separate tablegen
+// Operands for each access size. This multiclass takes care of instantiating
+// the correct template functions in the rest of the backend.
+
+multiclass offsets_simm7<string MemSize, string prefix> {
+ // The bare signed 7-bit immediate is used in post-indexed instructions, but
+ // because of the scaling performed a generic "simm7" operand isn't
+ // appropriate here either.
+ def simm7_asmoperand : AsmOperandClass {
+ let Name = "SImm7_Scaled" # MemSize;
+ let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
+ let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
+ let DiagnosticType = "LoadStoreSImm7_" # MemSize;
+ }
+
+ def simm7 : Operand<i64> {
+ let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
+ }
+}
+
+defm word_ : offsets_simm7<"4", "word_">;
+defm dword_ : offsets_simm7<"8", "dword_">;
+defm qword_ : offsets_simm7<"16", "qword_">;
+
+multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
+ Operand simm7, string prefix> {
+ def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayStore = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _LDR : A64I_LSPoffset<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
+ (outs GPR64xsp:$Rn_wb),
+ (ins SomeReg:$Rt, SomeReg:$Rt2,
+ GPR64xsp:$Rn,
+ simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayStore = 1;
+ let Constraints = "$Rn = $Rn_wb";
+
+ // Decoder only needed for unpredictability checking (FIXME).
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayStore = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+
+ def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
+ (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+ "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayStore = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+ def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
+ (outs SomeReg:$Rt, SomeReg:$Rt2),
+ (ins GPR64xsp:$Rn, simm7:$SImm7),
+ "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+ }
+ def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
+ SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
+
+}
+
+
+defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
+defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
+defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
+defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
+ "LSFPPair128">;
+
+
+def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+ let mayLoad = 1;
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
+ (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
+
+def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
+ (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
+ (ins GPR64xsp:$Rn, word_simm7:$SImm7),
+ "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
+ [], NoItinerary> {
+ let mayLoad = 1;
+ let Constraints = "$Rn = $Rn_wb";
+ let DecoderMethod = "DecodeLDSTPairInstruction";
+}
+
+//===----------------------------------------------------------------------===//
+// Logical (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
+
+multiclass logical_imm_operands<string prefix, string note,
+ int size, ValueType VT> {
+ def _asmoperand : AsmOperandClass {
+ let Name = "LogicalImm" # note # size;
+ let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
+ let RenderMethod = "addLogicalImmOperands<" # size # ">";
+ let DiagnosticType = "LogicalSecondSource";
+ }
+
+ def _operand
+ : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+ let PrintMethod = "printLogicalImmOperand<" # size # ">";
+ let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
+ }
+}
+
+defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
+defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
+
+// The mov versions only differ in assembly parsing, where they
+// exclude values representable with either MOVZ or MOVN.
+defm logical_imm32_mov
+ : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
+defm logical_imm64_mov
+ : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
+
+
+multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
+ def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
+ (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [(set GPR32wsp:$Rd,
+ (opnode GPR32:$Rn, logical_imm32_operand:$Imm))],
+ NoItinerary>;
+
+ def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
+ (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [(set GPR64xsp:$Rd,
+ (opnode GPR64:$Rn, logical_imm64_operand:$Imm))],
+ NoItinerary>;
+}
+
+defm AND : A64I_logimmSizes<0b00, "and", and>;
+defm ORR : A64I_logimmSizes<0b01, "orr", or>;
+defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
+
+let Defs = [NZCV] in {
+ def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
+ (ins GPR32:$Rn, logical_imm32_operand:$Imm),
+ "ands\t$Rd, $Rn, $Imm",
+ [], NoItinerary>;
+
+ def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
+ (ins GPR64:$Rn, logical_imm64_operand:$Imm),
+ "ands\t$Rd, $Rn, $Imm",
+ [], NoItinerary>;
+}
+
+
+def : InstAlias<"tst $Rn, $Imm",
+ (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
+def : InstAlias<"tst $Rn, $Imm",
+ (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+ (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
+def : InstAlias<"mov $Rd, $Imm",
+ (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
+
+//===----------------------------------------------------------------------===//
+// Logical (shifted register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
+
+// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
+// behaves differently for unsigned comparisons, so we defensively only allow
+// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
+// equal to 0" and LLVM gives us this.
+def signed_cond : PatLeaf<(cond), [{
+ return !isUnsignedIntSetCC(N->get());
+}]>;
+
+
+// These instructions share their "shift" operands with add/sub (shifted
+// register instructions). They are defined there.
+
+// N.b. the commutable parameter is just !N. It will be first against the wall
+// when the revolution comes.
+multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
+ bit N, bit commutable,
+ string asmop, SDPatternOperator opfrag, string sty,
+ RegisterClass GPR, list<Register> defs> {
+ let isCommutable = commutable, Defs = defs in {
+ def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (shl GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (srl GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, opc, 0b10, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (sra GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, opc, 0b11, N,
+ (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("ror_operand_" # sty):$Imm6),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
+ [(set GPR:$Rd, (opfrag GPR:$Rn, (rotr GPR:$Rm,
+ !cast<Operand>("ror_operand_" # sty):$Imm6))
+ )],
+ NoItinerary>;
+ }
+
+ def _noshift
+ : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
+ GPR:$Rm, 0)>;
+
+ def : Pat<(opfrag GPR:$Rn, GPR:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
+ string asmop, SDPatternOperator opfrag,
+ list<Register> defs> {
+ defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
+ commutable, asmop, opfrag, "i64", GPR64, defs>;
+ defm www : logical_shifts<prefix # "www", 0b0, opc, N,
+ commutable, asmop, opfrag, "i32", GPR32, defs>;
+}
+
+
+defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
+defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
+defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
+defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
+ PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
+ [{ (void)N; return false; }]>,
+ [NZCV]>;
+
+defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (and node:$lhs, (not node:$rhs))>, []>;
+defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (or node:$lhs, (not node:$rhs))>, []>;
+defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (xor node:$lhs, (not node:$rhs))>, []>;
+defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (and node:$lhs, (not node:$rhs)),
+ [{ (void)N; return false; }]>,
+ [NZCV]>;
+
+multiclass tst_shifts<string prefix, bit sf, string sty, RegisterClass GPR> {
+ let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
+ def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and GPR:$Rn, (shl GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+
+ def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and GPR:$Rn, (srl GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and GPR:$Rn, (sra GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
+ (outs),
+ (ins GPR:$Rn, GPR:$Rm,
+ !cast<Operand>("ror_operand_" # sty):$Imm6),
+ "tst\t$Rn, $Rm, $Imm6",
+ [(set NZCV, (A64setcc (and GPR:$Rn, (rotr GPR:$Rm,
+ !cast<Operand>("ror_operand_" # sty):$Imm6)),
+ 0, signed_cond))],
+ NoItinerary>;
+ }
+
+ def _noshift : InstAlias<"tst $Rn, $Rm",
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(A64setcc (and GPR:$Rn, GPR:$Rm), 0, signed_cond),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+}
+
+defm TSTxx : tst_shifts<"TSTxx", 0b1, "i64", GPR64>;
+defm TSTww : tst_shifts<"TSTww", 0b0, "i32", GPR32>;
+
+
+multiclass mvn_shifts<string prefix, bit sf, string sty, RegisterClass GPR> {
+ let isCommutable = 0, Rn = 0b11111 in {
+ def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set GPR:$Rd, (not (shl GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6)))],
+ NoItinerary>;
+
+
+ def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set GPR:$Rd, (not (srl GPR:$Rm,
+ !cast<Operand>("lsr_operand_" # sty):$Imm6)))],
+ NoItinerary>;
+
+ def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set GPR:$Rd, (not (sra GPR:$Rm,
+ !cast<Operand>("asr_operand_" # sty):$Imm6)))],
+ NoItinerary>;
+
+ def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
+ (outs GPR:$Rd),
+ (ins GPR:$Rm,
+ !cast<Operand>("ror_operand_" # sty):$Imm6),
+ "mvn\t$Rd, $Rm, $Imm6",
+ [(set GPR:$Rd, (not (rotr GPR:$Rm,
+ !cast<Operand>("lsl_operand_" # sty):$Imm6)))],
+ NoItinerary>;
+ }
+
+ def _noshift : InstAlias<"mvn $Rn, $Rm",
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+
+ def : Pat<(not GPR:$Rm),
+ (!cast<Instruction>(prefix # "_lsl") GPR:$Rm, 0)>;
+}
+
+defm MVNxx : mvn_shifts<"MVNxx", 0b1, "i64", GPR64>;
+defm MVNww : mvn_shifts<"MVNww", 0b0, "i32", GPR32>;
+
+def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
+def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Move wide (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: MOVN, MOVZ, MOVK + MOV aliases
+
+// A wide variety of different relocations are needed for variants of these
+// instructions, so it turns out that we need a different operand for all of
+// them.
+multiclass movw_operands<string prefix, string instname, int width> {
+ def _imm_asmoperand : AsmOperandClass {
+ let Name = instname # width # "Shifted" # shift;
+ let PredicateMethod = "is" # instname # width # "Imm";
+ let RenderMethod = "addMoveWideImmOperands";
+ let ParserMethod = "ParseImmWithLSLOperand";
+ let DiagnosticType = "MOVWUImm16";
+ }
+
+ def _imm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
+ let PrintMethod = "printMoveWideImmOperand";
+ let EncoderMethod = "getMoveWideImmOpValue";
+ let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
+
+ let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+ }
+}
+
+defm movn32 : movw_operands<"movn32", "MOVN", 32>;
+defm movn64 : movw_operands<"movn64", "MOVN", 64>;
+defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
+defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
+defm movk32 : movw_operands<"movk32", "MOVK", 32>;
+defm movk64 : movw_operands<"movk64", "MOVK", 64>;
+
+multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
+ dag ins64bit> {
+
+ def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
+ !strconcat(asmop, "\t$Rd, $FullImm"),
+ [], NoItinerary> {
+ bits<18> FullImm;
+ let UImm16 = FullImm{15-0};
+ let Shift = FullImm{17-16};
+ }
+
+ def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
+ !strconcat(asmop, "\t$Rd, $FullImm"),
+ [], NoItinerary> {
+ bits<18> FullImm;
+ let UImm16 = FullImm{15-0};
+ let Shift = FullImm{17-16};
+ }
+}
+
+let isMoveImm = 1, isReMaterializable = 1,
+ isAsCheapAsAMove = 1, hasSideEffects = 0 in {
+ defm MOVN : A64I_movwSizes<0b00, "movn",
+ (ins movn32_imm:$FullImm),
+ (ins movn64_imm:$FullImm)>;
+
+ // Some relocations are able to convert between a MOVZ and a MOVN. If these
+ // are applied the instruction must be emitted with the corresponding bits as
+ // 0, which means a MOVZ needs to override that bit from the default.
+ let PostEncoderMethod = "fixMOVZ" in
+ defm MOVZ : A64I_movwSizes<0b10, "movz",
+ (ins movz32_imm:$FullImm),
+ (ins movz64_imm:$FullImm)>;
+}
+
+let Constraints = "$src = $Rd" in
+defm MOVK : A64I_movwSizes<0b11, "movk",
+ (ins GPR32:$src, movk32_imm:$FullImm),
+ (ins GPR64:$src, movk64_imm:$FullImm)>;
+
+
+// And now the "MOV" aliases. These also need their own operands because what
+// they accept is completely different to what the base instructions accept.
+multiclass movalias_operand<string prefix, string basename,
+ string immpredicate, int width> {
+ def _asmoperand : AsmOperandClass {
+ let Name = basename # width # "MovAlias";
+ let PredicateMethod
+ = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
+ let RenderMethod
+ = "addMoveWideMovAliasOperands<" # width # ", "
+ # "A64Imms::" # immpredicate # ">";
+ }
+
+ def _movimm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
+
+ let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
+ }
+}
+
+defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
+defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
+defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
+defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
+
+// FIXME: these are officially canonical aliases, but TableGen is too limited to
+// print them at the moment. I believe in this case an "AliasPredicate" method
+// will need to be implemented. to allow it, as well as the more generally
+// useful handling of non-register, non-constant operands.
+class movalias<Instruction INST, RegisterClass GPR, Operand operand>
+ : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
+
+def : movalias<MOVZwii, GPR32, movz32_movimm>;
+def : movalias<MOVZxii, GPR64, movz64_movimm>;
+def : movalias<MOVNwii, GPR32, movn32_movimm>;
+def : movalias<MOVNxii, GPR64, movn64_movimm>;
+
+//===----------------------------------------------------------------------===//
+// PC-relative addressing instructions
+//===----------------------------------------------------------------------===//
+// Contains: ADR, ADRP
+
+def adr_label : Operand<i64> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
+
+ // This label is a 21-bit offset from PC, unscaled
+ let PrintMethod = "printLabelOperand<21, 1>";
+ let ParserMatchClass = label_asmoperand<21, 1>;
+ let OperandType = "OPERAND_PCREL";
+}
+
+def adrp_label_asmoperand : AsmOperandClass {
+ let Name = "AdrpLabel";
+ let RenderMethod = "addLabelOperands<21, 4096>";
+ let DiagnosticType = "Label";
+}
+
+def adrp_label : Operand<i64> {
+ let EncoderMethod = "getAdrpLabelOpValue";
+
+ // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
+ let PrintMethod = "printLabelOperand<21, 4096>";
+ let ParserMatchClass = adrp_label_asmoperand;
+ let OperandType = "OPERAND_PCREL";
+}
+
+let hasSideEffects = 0 in {
+ def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
+ "adr\t$Rd, $Label", [], NoItinerary>;
+
+ def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
+ "adrp\t$Rd, $Label", [], NoItinerary>;
+}
+
+//===----------------------------------------------------------------------===//
+// System instructions
+//===----------------------------------------------------------------------===//
+// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
+// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
+
+// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
+def uimm3_asmoperand : AsmOperandClass {
+ let Name = "UImm3";
+ let PredicateMethod = "isUImm<3>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm3";
+}
+
+def uimm3 : Operand<i32> {
+ let ParserMatchClass = uimm3_asmoperand;
+}
+
+// The HINT alias can accept a simple unsigned 7-bit immediate.
+def uimm7_asmoperand : AsmOperandClass {
+ let Name = "UImm7";
+ let PredicateMethod = "isUImm<7>";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "UImm7";
+}
+
+def uimm7 : Operand<i32> {
+ let ParserMatchClass = uimm7_asmoperand;
+}
+
+// Multiclass namedimm is defined with the prefetch operands. Most of these fit
+// into the NamedImmMapper scheme well: they either accept a named operand or
+// any immediate under a particular value (which may be 0, implying no immediate
+// is allowed).
+defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
+defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
+defm ic : namedimm<"ic", "A64IC::ICMapper">;
+defm dc : namedimm<"dc", "A64DC::DCMapper">;
+defm at : namedimm<"at", "A64AT::ATMapper">;
+defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
+
+// However, MRS and MSR are more complicated for a few reasons:
+// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
+// implementation-defined effect
+// * Most registers are shared, but some are read-only or write-only.
+// * There is a variant of MSR which accepts the same register name (SPSel),
+// but which would have a different encoding.
+
+// In principle these could be resolved in with more complicated subclasses of
+// NamedImmMapper, however that imposes an overhead on other "named
+// immediates". Both in concrete terms with virtual tables and in unnecessary
+// abstraction.
+
+// The solution adopted here is to take the MRS/MSR Mappers out of the usual
+// hierarchy (they're not derived from NamedImmMapper) and to add logic for
+// their special situation.
+def mrs_asmoperand : AsmOperandClass {
+ let Name = "MRS";
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MRS";
+}
+
+def mrs_op : Operand<i32> {
+ let ParserMatchClass = mrs_asmoperand;
+ let PrintMethod = "printMRSOperand";
+ let DecoderMethod = "DecodeMRSOperand";
+}
+
+def msr_asmoperand : AsmOperandClass {
+ let Name = "MSRWithReg";
+
+ // Note that SPSel is valid for both this and the pstate operands, but with
+ // different immediate encodings. This is why these operands provide a string
+ // AArch64Operand rather than an immediate. The overlap is small enough that
+ // it could be resolved with hackery now, but who can say in future?
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MSR";
+}
+
+def msr_op : Operand<i32> {
+ let ParserMatchClass = msr_asmoperand;
+ let PrintMethod = "printMSROperand";
+ let DecoderMethod = "DecodeMSROperand";
+}
+
+def pstate_asmoperand : AsmOperandClass {
+ let Name = "MSRPState";
+ // See comment above about parser.
+ let ParserMethod = "ParseSysRegOperand";
+ let DiagnosticType = "MSR";
+}
+
+def pstate_op : Operand<i32> {
+ let ParserMatchClass = pstate_asmoperand;
+ let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
+ let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
+}
+
+// When <CRn> is specified, an assembler should accept something like "C4", not
+// the usual "#4" immediate.
+def CRx_asmoperand : AsmOperandClass {
+ let Name = "CRx";
+ let PredicateMethod = "isUImm<4>";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "ParseCRxOperand";
+ // Diagnostics are handled in all cases by ParseCRxOperand.
+}
+
+def CRx : Operand<i32> {
+ let ParserMatchClass = CRx_asmoperand;
+ let PrintMethod = "printCRxOperand";
+}
+
+
+// Finally, we can start defining the instructions.
+
+// HINT is straightforward, with a few aliases.
+def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
+ [], NoItinerary> {
+ bits<7> UImm7;
+ let CRm = UImm7{6-3};
+ let Op2 = UImm7{2-0};
+
+ let Op0 = 0b00;
+ let Op1 = 0b011;
+ let CRn = 0b0010;
+ let Rt = 0b11111;
+}
+
+def : InstAlias<"nop", (HINTi 0)>;
+def : InstAlias<"yield", (HINTi 1)>;
+def : InstAlias<"wfe", (HINTi 2)>;
+def : InstAlias<"wfi", (HINTi 3)>;
+def : InstAlias<"sev", (HINTi 4)>;
+def : InstAlias<"sevl", (HINTi 5)>;
+
+// Quite a few instructions then follow a similar pattern of fixing common
+// fields in the bitpattern, we'll define a helper-class for them.
+class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
+ Operand operand, string asmop>
+ : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
+ [], NoItinerary> {
+ let Op0 = op0;
+ let Op1 = op1;
+ let CRn = crn;
+ let Op2 = op2;
+ let Rt = 0b11111;
+}
+
+
+def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
+def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
+def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
+def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
+
+def : InstAlias<"clrex", (CLREXi 0b1111)>;
+def : InstAlias<"isb", (ISBi 0b1111)>;
+
+// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
+// configurations at least.
+def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
+
+// Any SYS bitpattern can be represented with a complex and opaque "SYS"
+// instruction.
+def SYSiccix : A64I_system<0b0, (outs),
+ (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
+ uimm3:$Op2, GPR64:$Rt),
+ "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
+ [], NoItinerary> {
+ let Op0 = 0b01;
+}
+
+// You can skip the Xt argument whether it makes sense or not for the generic
+// SYS instruction.
+def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
+ (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
+
+
+// But many have aliases, which obviously don't fit into
+class SYSalias<dag ins, string asmstring>
+ : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
+ let isAsmParserOnly = 1;
+
+ bits<14> SysOp;
+ let Op0 = 0b01;
+ let Op1 = SysOp{13-11};
+ let CRn = SysOp{10-7};
+ let CRm = SysOp{6-3};
+ let Op2 = SysOp{2-0};
+}
+
+def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
+
+def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
+ let Rt = 0b11111;
+}
+
+def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
+def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
+
+def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
+
+def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
+ let Rt = 0b11111;
+}
+
+
+def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
+ (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
+ "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
+ [], NoItinerary> {
+ let Op0 = 0b01;
+}
+
+// The instructions themselves are rather simple for MSR and MRS.
+def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
+ "msr\t$SysReg, $Rt", [], NoItinerary> {
+ bits<16> SysReg;
+ let Op0 = SysReg{15-14};
+ let Op1 = SysReg{13-11};
+ let CRn = SysReg{10-7};
+ let CRm = SysReg{6-3};
+ let Op2 = SysReg{2-0};
+}
+
+def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
+ "mrs\t$Rt, $SysReg", [], NoItinerary> {
+ bits<16> SysReg;
+ let Op0 = SysReg{15-14};
+ let Op1 = SysReg{13-11};
+ let CRn = SysReg{10-7};
+ let CRm = SysReg{6-3};
+ let Op2 = SysReg{2-0};
+}
+
+def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
+ "msr\t$PState, $CRm", [], NoItinerary> {
+ bits<6> PState;
+
+ let Op0 = 0b00;
+ let Op1 = PState{5-3};
+ let CRn = 0b0100;
+ let Op2 = PState{2-0};
+ let Rt = 0b11111;
+}
+
+//===----------------------------------------------------------------------===//
+// Test & branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: TBZ, TBNZ
+
+// The bit to test is a simple unsigned 6-bit immediate in the X-register
+// versions.
+def uimm6 : Operand<i64> {
+ let ParserMatchClass = uimm6_asmoperand;
+}
+
+def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
+
+def tbimm_target : Operand<OtherVT> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
+
+ // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<14, 4>";
+ let ParserMatchClass = label_wid14_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
+def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
+
+// These instructions correspond to patterns involving "and" with a power of
+// two, which we need to be able to select.
+def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
+def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
+
+let isBranch = 1, isTerminator = 1 in {
+ def TBZxii : A64I_TBimm<0b0, (outs),
+ (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+ "tbz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0),
+ A64eq, bb:$Label)],
+ NoItinerary>;
+
+ def TBNZxii : A64I_TBimm<0b1, (outs),
+ (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
+ "tbnz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and GPR64:$Rt, tstb64_pat:$Imm), 0),
+ A64ne, bb:$Label)],
+ NoItinerary>;
+
+
+ // Note, these instructions overlap with the above 64-bit patterns. This is
+ // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
+ // do the same thing and are both permitted assembly. They also both have
+ // sensible DAG patterns.
+ def TBZwii : A64I_TBimm<0b0, (outs),
+ (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+ "tbz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0),
+ A64eq, bb:$Label)],
+ NoItinerary> {
+ let Imm{5} = 0b0;
+ }
+
+ def TBNZwii : A64I_TBimm<0b1, (outs),
+ (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
+ "tbnz\t$Rt, $Imm, $Label",
+ [(A64br_cc (A64cmp (and GPR32:$Rt, tstb32_pat:$Imm), 0),
+ A64ne, bb:$Label)],
+ NoItinerary> {
+ let Imm{5} = 0b0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (immediate) instructions
+//===----------------------------------------------------------------------===//
+// Contains: B, BL
+
+def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
+
+def bimm_target : Operand<OtherVT> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
+
+ // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<26, 4>";
+ let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+def blimm_target : Operand<i64> {
+ let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
+
+ // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
+ let PrintMethod = "printLabelOperand<26, 4>";
+ let ParserMatchClass = label_wid26_scal4_asmoperand;
+
+ let OperandType = "OPERAND_PCREL";
+}
+
+class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
+ : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
+ !strconcat(asmop, "\t$Label"), patterns,
+ NoItinerary>;
+
+let isBranch = 1 in {
+ def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ }
+
+ def BLimm : A64I_BimmImpl<0b1, "bl",
+ [(AArch64Call tglobaladdr:$Label)], blimm_target> {
+ let isCall = 1;
+ let Defs = [X30];
+ }
+}
+
+def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
+
+//===----------------------------------------------------------------------===//
+// Unconditional branch (register) instructions
+//===----------------------------------------------------------------------===//
+// Contains: BR, BLR, RET, ERET, DRP.
+
+// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
+// at the moment.
+class A64I_BregImpl<bits<4> opc,
+ dag outs, dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin = NoItinerary>
+ : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
+ outs, ins, asmstr, patterns, itin> {
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+// Note that these are not marked isCall or isReturn because as far as LLVM is
+// concerned they're not. "ret" is just another jump unless it has been selected
+// by LLVM as the function's return.
+
+let isBranch = 1 in {
+ def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
+ "br\t$Rn", [(brind GPR64:$Rn)]> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ }
+
+ def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
+ "blr\t$Rn", [(AArch64Call GPR64:$Rn)]> {
+ let isBarrier = 0;
+ let isCall = 1;
+ let Defs = [X30];
+ }
+
+ def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
+ "ret\t$Rn", []> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isReturn = 1;
+ }
+
+ // Create a separate pseudo-instruction for codegen to use so that we don't
+ // flag x30 as used in every function. It'll be restored before the RET by the
+ // epilogue if it's legitimately used.
+ def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isReturn = 1;
+ }
+
+ def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
+ let Rn = 0b11111;
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isReturn = 1;
+ }
+
+ def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
+ let Rn = 0b11111;
+ let isBarrier = 1;
+ }
+}
+
+def RETAlias : InstAlias<"ret", (RETx X30)>;
+
+
+//===----------------------------------------------------------------------===//
+// Address generation patterns
+//===----------------------------------------------------------------------===//
+
+// Primary method of address generation for the small/absolute memory model is
+// an ADRP/ADR pair:
+// ADRP x0, some_variable
+// ADD x0, x0, #:lo12:some_variable
+//
+// The load/store elision of the ADD is accomplished when selecting
+// addressing-modes. This just mops up the cases where that doesn't work and we
+// really need an address in some register.
+
+// This wrapper applies a LO12 modifier to the address. Otherwise we could just
+// use the same address.
+
+class ADRP_ADD<SDNode Wrapper, SDNode addrop>
+ : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
+ (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
+
+def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
+def : ADRP_ADD<A64WrapperSmall, texternalsym>;
+def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
+def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
+def : ADRP_ADD<A64WrapperSmall, tjumptable>;
+
+//===----------------------------------------------------------------------===//
+// GOT access patterns
+//===----------------------------------------------------------------------===//
+
+// FIXME: Wibble
+
+class GOTLoadSmall<SDNode addrfrag>
+ : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
+ (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
+
+def : GOTLoadSmall<texternalsym>;
+def : GOTLoadSmall<tglobaladdr>;
+def : GOTLoadSmall<tglobaltlsaddr>;
+
+//===----------------------------------------------------------------------===//
+// Tail call handling
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
+ def TC_RETURNdi
+ : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
+ [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
+
+ def TC_RETURNxi
+ : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
+ [(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff))]>;
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ Uses = [XSP] in {
+ def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
+ (Bimm bimm_target:$Label)>;
+
+ def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
+ (BRx GPR64:$Rd)>;
+}
+
+
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+ (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+//===----------------------------------------------------------------------===//
+// Thread local storage
+//===----------------------------------------------------------------------===//
+
+// This is a pseudo-instruction representing the ".tlsdesccall" directive in
+// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
+// current location. It should always be immediately followed by a BLR
+// instruction, and is intended solely for relaxation by the linker.
+
+def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
+
+def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
+ let hasSideEffects = 1;
+}
+
+def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
+ [(A64tlsdesc_blr GPR64:$Rn, tglobaltlsaddr:$Var)]> {
+ let isCall = 1;
+ let Defs = [X30];
+}
+
+def : Pat<(A64tlsdesc_blr GPR64:$Rn, texternalsym:$Var),
+ (TLSDESC_BLRx GPR64:$Rn, texternalsym:$Var)>;
+
+//===----------------------------------------------------------------------===//
+// Bitfield patterns
+//===----------------------------------------------------------------------===//
+
+def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
+}]>;
+
+def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
+}]>;
+
+def bfi_width_to_imms : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
+}]>;
+
+
+// The simpler patterns deal with cases where no AND mask is actually needed
+// (either all bits are used or the low 32 bits are used).
+let AddedComplexity = 10 in {
+
+def : Pat<(A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
+ (BFIxxii GPR64:$src, GPR64:$Rn,
+ (bfi64_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+def : Pat<(A64Bfi GPR32:$src, GPR32:$Rn, imm:$ImmR, imm:$ImmS),
+ (BFIwwii GPR32:$src, GPR32:$Rn,
+ (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS)))>;
+
+
+def : Pat<(and (A64Bfi GPR64:$src, GPR64:$Rn, imm:$ImmR, imm:$ImmS),
+ (i64 4294967295)),
+ (SUBREG_TO_REG (i64 0),
+ (BFIwwii (EXTRACT_SUBREG GPR64:$src, sub_32),
+ (EXTRACT_SUBREG GPR64:$Rn, sub_32),
+ (bfi32_lsb_to_immr (i64 imm:$ImmR)),
+ (bfi_width_to_imms (i64 imm:$ImmS))),
+ sub_32)>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous patterns
+//===----------------------------------------------------------------------===//
+
+// Truncation from 64 to 32-bits just involves renaming your register.
+def : Pat<(i32 (trunc (i64 GPR64:$val))), (EXTRACT_SUBREG GPR64:$val, sub_32)>;
+
+// Similarly, extension where we don't care about the high bits is
+// just a rename.
+def : Pat<(i64 (anyext (i32 GPR32:$val))),
+ (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$val, sub_32)>;
+
+// SELECT instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : PseudoInst<(outs FPR128:$Rd),
+ (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
+ [(set FPR128:$Rd, (simple_select (f128 FPR128:$Rn),
+ FPR128:$Rm))]> {
+ let Uses = [NZCV];
+ let usesCustomInserter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Load/store patterns
+//===----------------------------------------------------------------------===//
+
+// There are lots of patterns here, because we need to allow at least three
+// parameters to vary independently.
+// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
+// 2. LLVM source: zextloadi8, anyextloadi8, ...
+// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
+//
+// The biggest problem turns out to be the address-generation variable. At the
+// point of instantiation we need to produce two DAGs, one for the pattern and
+// one for the instruction. Doing this at the lowest level of classes doesn't
+// work.
+//
+// Consider the simple uimm12 addressing mode, and the desire to match both (add
+// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
+// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
+// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
+// operation, and PatFrags are for selection not output.
+//
+// As a result, the address-generation patterns are the final
+// instantiations. However, we do still need to vary the operand for the address
+// further down (At the point we're deciding A64WrapperSmall, we don't know
+// the memory width of the operation).
+
+//===------------------------------
+// 1. Basic infrastructural defs
+//===------------------------------
+
+// First, some simple classes for !foreach and !subst to use:
+class Decls {
+ dag pattern;
+}
+
+def decls : Decls;
+def ALIGN;
+def INST;
+def OFFSET;
+def SHIFT;
+
+// You can't use !subst on an actual immediate, but you *can* use it on an
+// operand record that happens to match a single immediate. So we do.
+def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
+def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
+def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
+def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
+def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
+
+// If the low bits of a pointer are known to be 0 then an "or" is just as good
+// as addition for computing an offset. This fragment forwards that check for
+// TableGen's use.
+def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
+[{
+ return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
+}]>;
+
+// Load/store (unsigned immediate) operations with relocations against global
+// symbols (for lo12) are only valid if those symbols have correct alignment
+// (since the immediate offset is divided by the access scale, it can't have a
+// remainder).
+//
+// The guaranteed alignment is provided as part of the WrapperSmall
+// operation, and checked against one of these.
+def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
+def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>;
+def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>;
+def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>;
+def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
+
+// "Normal" load/store instructions can be used on atomic operations, provided
+// the ordering parameter is at most "monotonic". Anything above that needs
+// special handling with acquire/release instructions.
+class simple_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_load_simple_i8 : simple_load<atomic_load_8>;
+def atomic_load_simple_i16 : simple_load<atomic_load_16>;
+def atomic_load_simple_i32 : simple_load<atomic_load_32>;
+def atomic_load_simple_i64 : simple_load<atomic_load_64>;
+
+class simple_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
+}]>;
+
+def atomic_store_simple_i8 : simple_store<atomic_store_8>;
+def atomic_store_simple_i16 : simple_store<atomic_store_16>;
+def atomic_store_simple_i32 : simple_store<atomic_store_32>;
+def atomic_store_simple_i64 : simple_store<atomic_store_64>;
+
+//===------------------------------
+// 2. UImm12 and SImm9
+//===------------------------------
+
+// These instructions have two operands providing the address so they can be
+// treated similarly for most purposes.
+
+//===------------------------------
+// 2.1 Base patterns covering extend/truncate semantics
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, RegisterClass TPR,
+ ValueType sty> {
+ def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+ (LOAD Base, Offset)>;
+
+ def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt),
+ (STORE TPR:$Rt, Base, Offset)>;
+}
+
+// Instructions accessing a memory chunk smaller than a register (or, in a
+// pinch, the same size) have a characteristic set of patterns they want to
+// match: extending loads and truncating stores. This class deals with the
+// sign-neutral version of those patterns.
+//
+// It will be instantiated across multiple addressing-modes.
+multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset,
+ dag address, ValueType sty>
+ : ls_atomic_pats<LOAD, STORE, Base, Offset, address, GPR32, sty> {
+ def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
+
+ def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
+
+ // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+ // register was actually set.
+ def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+ def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
+
+ def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address),
+ (STORE GPR32:$Rt, Base, Offset)>;
+
+ // For truncating store from 64-bits, we have to manually tell LLVM to
+ // ignore the high bits of the x register.
+ def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address),
+ (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>;
+}
+
+// Next come patterns for sign-extending loads.
+multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
+ dag address, ValueType sty> {
+ def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
+
+ def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
+
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, RegisterClass TPR,
+ ValueType sty> {
+ def : Pat<(sty (load address)), (LOAD Base, Offset)>;
+ def : Pat<(store (sty TPR:$Rt), address), (STORE TPR:$Rt, Base, Offset)>;
+}
+
+// Integer operations also get atomic instructions to select for.
+multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag address, RegisterClass TPR,
+ ValueType sty>
+ : ls_neutral_pats<LOAD, STORE, Base, Offset, address, TPR, sty>,
+ ls_atomic_pats<LOAD, STORE, Base, Offset, address, TPR, sty>;
+
+//===------------------------------
+// 2.2. Addressing-mode instantiations
+//===------------------------------
+
+multiclass uimm12_pats<dag address, dag Base, dag Offset> {
+ defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, byte_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, byte_uimm12,
+ !subst(ALIGN, any_align, decls.pattern))),
+ i8>;
+ defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ i16>;
+ defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ i32>;
+
+ defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ GPR32, i32>;
+
+ defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, dword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, dword_uimm12,
+ !subst(ALIGN, min_align8, decls.pattern))),
+ GPR64, i64>;
+
+ defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ FPR16, f16>;
+
+ defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern))),
+ FPR32, f32>;
+
+ defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, dword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, dword_uimm12,
+ !subst(ALIGN, min_align8, decls.pattern))),
+ FPR64, f64>;
+
+ defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, qword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, qword_uimm12,
+ !subst(ALIGN, min_align16, decls.pattern))),
+ FPR128, f128>;
+
+ defm : load_signed_pats<"B", "", Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, byte_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, byte_uimm12,
+ !subst(ALIGN, any_align, decls.pattern))),
+ i8>;
+
+ defm : load_signed_pats<"H", "", Base,
+ !foreach(decls.pattern, Offset,
+ !subst(OFFSET, hword_uimm12, decls.pattern)),
+ !foreach(decls.pattern, address,
+ !subst(OFFSET, hword_uimm12,
+ !subst(ALIGN, min_align2, decls.pattern))),
+ i16>;
+
+ def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+ !subst(OFFSET, word_uimm12,
+ !subst(ALIGN, min_align4, decls.pattern)))),
+ (LDRSWx Base, !foreach(decls.pattern, Offset,
+ !subst(OFFSET, word_uimm12, decls.pattern)))>;
+}
+
+// Straightforward patterns of last resort: a pointer with or without an
+// appropriate offset.
+defm : uimm12_pats<(i64 GPR64xsp:$Rn), (i64 GPR64xsp:$Rn), (i64 0)>;
+defm : uimm12_pats<(add GPR64xsp:$Rn, OFFSET:$UImm12),
+ (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>;
+
+// The offset could be hidden behind an "or", of course:
+defm : uimm12_pats<(add_like_or GPR64xsp:$Rn, OFFSET:$UImm12),
+ (i64 GPR64xsp:$Rn), (i64 OFFSET:$UImm12)>;
+
+// Global addresses under the small-absolute model should use these
+// instructions. There are ELF relocations specifically for it.
+defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
+ (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
+ ALIGN),
+ (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
+
+// External symbols that make it this far should also get standard relocations.
+defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
+ ALIGN),
+ (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
+
+defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
+ (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
+
+// We also want to use uimm12 instructions for local variables at the moment.
+def tframeindex_XFORM : SDNodeXForm<frameindex, [{
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->getTargetFrameIndex(FI, MVT::i64);
+}]>;
+
+defm : uimm12_pats<(i64 frameindex:$Rn),
+ (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
+
+// These can be much simpler than uimm12 because we don't to change the operand
+// type (e.g. LDURB and LDURH take the same operands).
+multiclass simm9_pats<dag address, dag Base, dag Offset> {
+ defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
+ defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
+
+ defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address,
+ GPR32, i32>;
+ defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address,
+ GPR64, i64>;
+
+ defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address,
+ FPR16, f16>;
+ defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address,
+ FPR32, f32>;
+ defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address,
+ FPR64, f64>;
+ defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
+ FPR128, f128>;
+
+ def : Pat<(i64 (zextloadi32 address)),
+ (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
+
+ def : Pat<(truncstorei32 GPR64:$Rt, address),
+ (LS32_STUR (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset)>;
+
+ defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
+ defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
+ def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
+}
+
+defm : simm9_pats<(add GPR64xsp:$Rn, simm9:$SImm9),
+ (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+defm : simm9_pats<(add_like_or GPR64xsp:$Rn, simm9:$SImm9),
+ (i64 GPR64xsp:$Rn), (SDXF_simm9 simm9:$SImm9)>;
+
+
+//===------------------------------
+// 3. Register offset patterns
+//===------------------------------
+
+// Atomic patterns can be shared between integer operations of all sizes, a
+// quick multiclass here allows reuse.
+multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
+ dag Offset, dag Extend, dag address,
+ RegisterClass TPR, ValueType sty> {
+ def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, TPR:$Rt),
+ (STORE TPR:$Rt, Base, Offset, Extend)>;
+}
+
+// The register offset instructions take three operands giving the instruction,
+// and have an annoying split between instructions where Rm is 32-bit and
+// 64-bit. So we need a special hierarchy to describe them. Other than that the
+// same operations should be supported as for simm9 and uimm12 addressing.
+
+multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend,
+ dag address, ValueType sty>
+ : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, GPR32, sty> {
+ def : Pat<(!cast<SDNode>(zextload # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ def : Pat<(!cast<SDNode>(extload # sty) address),
+ (LOAD Base, Offset, Extend)>;
+
+ // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
+ // register was actually set.
+ def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+ def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
+ (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
+
+ def : Pat<(!cast<SDNode>(truncstore # sty) GPR32:$Rt, address),
+ (STORE GPR32:$Rt, Base, Offset, Extend)>;
+
+ // For truncating store from 64-bits, we have to manually tell LLVM to
+ // ignore the high bits of the x register.
+ def : Pat<(!cast<SDNode>(truncstore # sty) GPR64:$Rt, address),
+ (STORE (EXTRACT_SUBREG GPR64:$Rt, sub_32), Base, Offset, Extend)>;
+
+}
+
+// Next come patterns for sign-extending loads.
+multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
+ dag address, ValueType sty> {
+ def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+
+ def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+ (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+}
+
+// and finally "natural-width" loads and stores come next.
+multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend, dag address,
+ RegisterClass TPR, ValueType sty> {
+ def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
+ def : Pat<(store (sty TPR:$Rt), address),
+ (STORE TPR:$Rt, Base, Offset, Extend)>;
+}
+
+multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
+ dag Base, dag Offset, dag Extend, dag address,
+ RegisterClass TPR, ValueType sty>
+ : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>,
+ ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, TPR, sty>;
+
+multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
+ dag Extend> {
+ defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq0, decls.pattern)),
+ i8>;
+ defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ i16>;
+ defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ i32>;
+
+ defm : ro_int_neutral_pats<
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ GPR32, i32>;
+
+ defm : ro_int_neutral_pats<
+ !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq3, decls.pattern)),
+ GPR64, i64>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ FPR16, f16>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern)),
+ FPR32, f32>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq3, decls.pattern)),
+ FPR64, f64>;
+
+ defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
+ !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
+ Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq4, decls.pattern)),
+ FPR128, f128>;
+
+ defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq0, decls.pattern)),
+ i8>;
+
+ defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
+ !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq1, decls.pattern)),
+ i16>;
+
+ def : Pat<(sextloadi32 !foreach(decls.pattern, address,
+ !subst(SHIFT, imm_eq2, decls.pattern))),
+ (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
+ Base, Offset, Extend)>;
+}
+
+
+// Finally we're in a position to tell LLVM exactly what addresses are reachable
+// using register-offset instructions. Essentially a base plus a possibly
+// extended, possibly shifted (by access size) offset.
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (sext GPR32:$Rm)),
+ (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 6)>;
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (sext GPR32:$Rm), SHIFT)),
+ (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 7)>;
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (zext GPR32:$Rm)),
+ (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Wm", (add GPR64xsp:$Rn, (shl (zext GPR32:$Rm), SHIFT)),
+ (i64 GPR64xsp:$Rn), (i32 GPR32:$Rm), (i64 3)>;
+
+defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, GPR64:$Rm),
+ (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 2)>;
+
+defm : regoff_pats<"Xm", (add GPR64xsp:$Rn, (shl GPR64:$Rm, SHIFT)),
+ (i64 GPR64xsp:$Rn), (i64 GPR64:$Rm), (i64 3)>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
new file mode 100644
index 0000000..c96bf85
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -0,0 +1,140 @@
+//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AArch64 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64AsmPrinter.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+MCOperand
+AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Sym) const {
+ const MCExpr *Expr = 0;
+
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+
+ switch (MO.getTargetFlags()) {
+ case AArch64II::MO_GOT:
+ Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOT_LO12:
+ Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_LO12:
+ Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_DTPREL_G1:
+ Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
+ break;
+ case AArch64II::MO_DTPREL_G0_NC:
+ Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOTTPREL:
+ Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
+ break;
+ case AArch64II::MO_GOTTPREL_LO12:
+ Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_TLSDESC:
+ Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
+ break;
+ case AArch64II::MO_TLSDESC_LO12:
+ Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
+ break;
+ case AArch64II::MO_TPREL_G1:
+ Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
+ break;
+ case AArch64II::MO_TPREL_G0_NC:
+ Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_NO_FLAG:
+ // Expr is already correct
+ break;
+ default:
+ llvm_unreachable("Unexpected MachineOperand flag");
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(),
+ OutContext),
+ OutContext);
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ if (MO.isImplicit())
+ return false;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Ignore call clobbers
+ return false;
+
+ }
+
+ return true;
+}
+
+void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
+ MCInst &OutMI,
+ AArch64AsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (AP.lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
new file mode 100644
index 0000000..f45d8f7
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -0,0 +1,18 @@
+//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file just contains the anchor for the AArch64MachineFunctionInfo to
+// force vtable emission.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void AArch64MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
new file mode 100644
index 0000000..33da54f
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -0,0 +1,149 @@
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AArch64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64MACHINEFUNCTIONINFO_H
+#define AARCH64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private AArch64
+/// target-specific information for each MachineFunction.
+class AArch64MachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// Number of bytes of arguments this function has on the stack. If the callee
+ /// is expected to restore the argument stack this should be a multiple of 16,
+ /// all usable during a tail call.
+ ///
+ /// The alternative would forbid tail call optimisation in some cases: if we
+ /// want to transfer control from a function with 8-bytes of stack-argument
+ /// space to a function with 16-bytes then misalignment of this value would
+ /// make a stack adjustment necessary, which could not be undone by the
+ /// callee.
+ unsigned BytesInStackArgArea;
+
+ /// The number of bytes to restore to deallocate space for incoming
+ /// arguments. Canonically 0 in the C calling convention, but non-zero when
+ /// callee is expected to pop the args.
+ unsigned ArgumentStackToRestore;
+
+ /// If the stack needs to be adjusted on frame entry in two stages, this
+ /// records the size of the first adjustment just prior to storing
+ /// callee-saved registers. The callee-saved slots are addressed assuming
+ /// SP == <incoming-SP> - InitialStackAdjust.
+ unsigned InitialStackAdjust;
+
+ /// Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of the area where LowerFormalArguments puts the
+ /// general-purpose registers that might contain variadic parameters.
+ int VariadicGPRIdx;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The size of the frame object used to store the general-purpose registers
+ /// which might contain variadic arguments. This is the offset from
+ /// VariadicGPRIdx to what's stored in __gr_top.
+ unsigned VariadicGPRSize;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of the area where LowerFormalArguments puts the
+ /// floating-point registers that might contain variadic parameters.
+ int VariadicFPRIdx;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The size of the frame object used to store the floating-point registers
+ /// which might contain variadic arguments. This is the offset from
+ /// VariadicFPRIdx to what's stored in __vr_top.
+ unsigned VariadicFPRSize;
+
+ /// @see AArch64 Procedure Call Standard, B.3
+ ///
+ /// The Frame index of an object pointing just past the last known stacked
+ /// argument on entry to a variadic function. This goes into the __stack field
+ /// of the va_list type.
+ int VariadicStackIdx;
+
+ /// The offset of the frame pointer from the stack pointer on function
+ /// entry. This is expected to be negative.
+ int FramePointerOffset;
+
+public:
+ AArch64MachineFunctionInfo()
+ : BytesInStackArgArea(0),
+ ArgumentStackToRestore(0),
+ InitialStackAdjust(0),
+ NumLocalDynamics(0),
+ VariadicGPRIdx(0),
+ VariadicGPRSize(0),
+ VariadicFPRIdx(0),
+ VariadicFPRSize(0),
+ VariadicStackIdx(0),
+ FramePointerOffset(0) {}
+
+ explicit AArch64MachineFunctionInfo(MachineFunction &MF)
+ : BytesInStackArgArea(0),
+ ArgumentStackToRestore(0),
+ InitialStackAdjust(0),
+ NumLocalDynamics(0),
+ VariadicGPRIdx(0),
+ VariadicGPRSize(0),
+ VariadicFPRIdx(0),
+ VariadicFPRSize(0),
+ VariadicStackIdx(0),
+ FramePointerOffset(0) {}
+
+ unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
+ void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+
+ unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
+ void setArgumentStackToRestore(unsigned bytes) {
+ ArgumentStackToRestore = bytes;
+ }
+
+ unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
+ void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+
+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+ int getVariadicGPRIdx() const { return VariadicGPRIdx; }
+ void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+
+ unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
+ void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+
+ int getVariadicFPRIdx() const { return VariadicFPRIdx; }
+ void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+
+ unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
+ void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+
+ int getVariadicStackIdx() const { return VariadicStackIdx; }
+ void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+
+ int getFramePointerOffset() const { return FramePointerOffset; }
+ void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
new file mode 100644
index 0000000..20b0dcf
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -0,0 +1,171 @@
+//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64RegisterInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+using namespace llvm;
+
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii,
+ const AArch64Subtarget &sti)
+ : AArch64GenRegisterInfo(AArch64::X30), TII(tii) {
+}
+
+const uint16_t *
+AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ return CSR_PCS_SaveList;
+}
+
+const uint32_t*
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+ return CSR_PCS_RegMask;
+}
+
+const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
+ return TLSDesc_RegMask;
+}
+
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::FlagClassRegClass)
+ return &AArch64::GPR64RegClass;
+
+ return RC;
+}
+
+
+
+BitVector
+AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ Reserved.set(AArch64::XSP);
+ Reserved.set(AArch64::WSP);
+
+ Reserved.set(AArch64::XZR);
+ Reserved.set(AArch64::WZR);
+
+ if (TFI->hasFP(MF)) {
+ Reserved.set(AArch64::X29);
+ Reserved.set(AArch64::W29);
+ }
+
+ return Reserved;
+}
+
+void
+AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
+ int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64FrameLowering *TFI =
+ static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
+
+ // In order to work out the base and offset for addressing, the FrameLowering
+ // code needs to know (sometimes) whether the instruction is storing/loading a
+ // callee-saved register, or whether it's a more generic
+ // operation. Fortunately the frame indices are used *only* for that purpose
+ // and are contiguous, so we can check here.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ int MinCSFI = 0;
+ int MaxCSFI = -1;
+
+ if (CSI.size()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ }
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+
+ unsigned FrameReg;
+ int64_t Offset;
+ Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
+ IsCalleeSaveOp);
+
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+
+ // DBG_VALUE instructions have no real restrictions so they can be handled
+ // easily.
+ if (MI.isDebugValue()) {
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ int MinOffset, MaxOffset, OffsetScale;
+ if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) {
+ MinOffset = 0;
+ MaxOffset = 0xfff;
+ OffsetScale = 1;
+ } else {
+ // Load/store of a stack object
+ TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
+ }
+
+ // The frame lowering has told us a base and offset it thinks we should use to
+ // access this variable, but it's still up to us to make sure the values are
+ // legal for the instruction in question.
+ if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) {
+ unsigned BaseReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
+ BaseReg, FrameReg, BaseReg, Offset);
+ FrameReg = BaseReg;
+ Offset = 0;
+ }
+
+ // Negative offsets are expected if we address from FP, but for
+ // now this checks nothing has gone horribly wrong.
+ assert(Offset >= 0 && "Unexpected negative offset from SP");
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
+}
+
+unsigned
+AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
+ return AArch64::X29;
+ else
+ return AArch64::XSP;
+}
+
+bool
+AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const AArch64FrameLowering *AFI
+ = static_cast<const AArch64FrameLowering*>(TFI);
+ return AFI->useFPForAddressing(MF);
+}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
new file mode 100644
index 0000000..bb64fd5
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -0,0 +1,76 @@
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
+#define LLVM_TARGET_AARCH64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "AArch64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class AArch64InstrInfo;
+class AArch64Subtarget;
+
+struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
+private:
+ const AArch64InstrInfo &TII;
+
+public:
+ AArch64RegisterInfo(const AArch64InstrInfo &tii,
+ const AArch64Subtarget &sti);
+
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+
+ const uint32_t *getTLSDescCallPreservedMask() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *Rs = NULL) const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns original class if it is
+ /// possible to copy between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ /// getLargestLegalSuperClass - Returns the largest super class of RC that is
+ /// legal to use in the current sub-target and has the same spill size.
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::tcGPR64RegClass)
+ return &AArch64::GPR64RegClass;
+
+ return RC;
+ }
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
new file mode 100644
index 0000000..bd79546
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -0,0 +1,203 @@
+//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declarations that describe the AArch64 register file
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AArch64" in {
+def sub_128 : SubRegIndex;
+def sub_64 : SubRegIndex;
+def sub_32 : SubRegIndex;
+def sub_16 : SubRegIndex;
+def sub_8 : SubRegIndex;
+
+// The VPR registers are handled as sub-registers of FPR equivalents, but
+// they're really the same thing. We give this concept a special index.
+def sub_alias : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+class AArch64Reg<bits<16> enc, string n> : Register<n> {
+ let HWEncoding = enc;
+ let Namespace = "AArch64";
+}
+
+class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
+ list<SubRegIndex> inds = []>
+ : AArch64Reg<enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = inds;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-30 in {
+ def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+}
+
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">;
+
+// Could be combined with previous loop, but this way leaves w and x registers
+// consecutive as LLVM register numbers, which makes for easier debugging.
+foreach Index = 0-30 in {
+ def X#Index : AArch64RegWithSubs<Index, "x"#Index,
+ [!cast<Register>("W"#Index)], [sub_32]>,
+ DwarfRegNum<[Index]>;
+}
+
+def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
+def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+
+// Most instructions treat register 31 as zero for reads and a black-hole for
+// writes.
+
+// Note that the order of registers is important for the Disassembler here:
+// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
+// take an encoding value.
+def GPR32 : RegisterClass<"AArch64", [i32], 32,
+ (add (sequence "W%u", 0, 30), WZR)> {
+}
+
+def GPR64 : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 30), XZR)> {
+}
+
+def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
+ (sequence "W%u", 0, 30)> {
+}
+
+def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
+ (sequence "X%u", 0, 30)> {
+}
+
+// For tail calls, we can't use callee-saved registers or the structure-return
+// register, as they are supposed to be live across function calls and may be
+// clobbered by the epilogue.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 7),
+ (sequence "X%u", 9, 18))> {
+}
+
+
+// Certain addressing-useful instructions accept sp directly. Again the order of
+// registers is important to the Disassembler.
+def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
+ (add (sequence "W%u", 0, 30), WSP)> {
+}
+
+def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 30), XSP)> {
+}
+
+// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
+// non-SP variants). We can't use a bare register in those patterns because
+// TableGen doesn't like it, so we need a class containing just stack registers
+def Rxsp : RegisterClass<"AArch64", [i64], 64,
+ (add XSP)> {
+}
+
+def Rwsp : RegisterClass<"AArch64", [i32], 32,
+ (add WSP)> {
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar registers in the vector unit:
+// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
+//===----------------------------------------------------------------------===//
+
+foreach Index = 0-31 in {
+ def B # Index : AArch64Reg< Index, "b" # Index>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def H # Index : AArch64RegWithSubs<Index, "h" # Index,
+ [!cast<Register>("B" # Index)], [sub_8]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def S # Index : AArch64RegWithSubs<Index, "s" # Index,
+ [!cast<Register>("H" # Index)], [sub_16]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def D # Index : AArch64RegWithSubs<Index, "d" # Index,
+ [!cast<Register>("S" # Index)], [sub_32]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+
+ def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
+ [!cast<Register>("D" # Index)], [sub_64]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+}
+
+
+def FPR8 : RegisterClass<"AArch64", [i8], 8,
+ (sequence "B%u", 0, 31)> {
+}
+
+def FPR16 : RegisterClass<"AArch64", [f16], 16,
+ (sequence "H%u", 0, 31)> {
+}
+
+def FPR32 : RegisterClass<"AArch64", [f32], 32,
+ (sequence "S%u", 0, 31)> {
+}
+
+def FPR64 : RegisterClass<"AArch64", [f64], 64,
+ (sequence "D%u", 0, 31)> {
+}
+
+def FPR128 : RegisterClass<"AArch64", [f128], 128,
+ (sequence "Q%u", 0, 31)> {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector registers:
+//===----------------------------------------------------------------------===//
+
+// NEON registers simply specify the overall vector, and it's expected that
+// Instructions will individually specify the acceptable data layout. In
+// principle this leaves two approaches open:
+// + An operand, giving a single ADDvvv instruction (for example). This turns
+// out to be unworkable in the assembly parser (without every Instruction
+// having a "cvt" function, at least) because the constraints can't be
+// properly enforced. It also complicates specifying patterns since each
+// instruction will accept many types.
+// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
+// details about NEON registers, but simplifies most other details.
+//
+// The second approach was taken.
+
+foreach Index = 0-31 in {
+ def V # Index : AArch64RegWithSubs<Index, "v" # Index,
+ [!cast<Register>("Q" # Index)],
+ [sub_alias]>,
+ DwarfRegNum<[!add(Index, 64)]>;
+}
+
+// These two classes contain the same registers, which should be reasonably
+// sensible for MC and allocation purposes, but allows them to be treated
+// separately for things like stack spilling.
+def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64,
+ (sequence "V%u", 0, 31)>;
+
+def VPR128 : RegisterClass<"AArch64",
+ [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
+ (sequence "V%u", 0, 31)>;
+
+// Flags register
+def NZCV : Register<"nzcv"> {
+ let Namespace = "AArch64";
+}
+
+def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+ let CopyCost = -1;
+ let isAllocatable = 0;
+}
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
new file mode 100644
index 0000000..e17cdaa
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -0,0 +1,10 @@
+//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
new file mode 100644
index 0000000..6bbe075
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
+}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
new file mode 100644
index 0000000..d412ed2
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -0,0 +1,32 @@
+//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AArch64 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
+#define LLVM_AARCH64SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AArch64TargetMachine;
+
+class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+ const AArch64Subtarget *Subtarget;
+public:
+ explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+ ~AArch64SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
new file mode 100644
index 0000000..d17b738
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -0,0 +1,43 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
+ : AArch64GenSubtargetInfo(TT, CPU, FS)
+ , HasNEON(true)
+ , HasCrypto(true)
+ , TargetTriple(TT) {
+
+ ParseSubtargetFeatures(CPU, FS);
+}
+
+bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+ Reloc::Model RelocM) const {
+ if (RelocM == Reloc::Static)
+ return false;
+
+ return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
new file mode 100644
index 0000000..2e9205f
--- /dev/null
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -0,0 +1,54 @@
+//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
+#define LLVM_TARGET_AARCH64_SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "AArch64GenSubtargetInfo.inc"
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+class GlobalValue;
+
+class AArch64Subtarget : public AArch64GenSubtargetInfo {
+protected:
+ bool HasNEON;
+ bool HasCrypto;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_AARCH64_SUBTARGET_H
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
new file mode 100644
index 0000000..df599d5
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -0,0 +1,81 @@
+//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the AArch64TargetMachine
+// methods. Principally just setting up the passes needed to generate correct
+// code on this architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAArch64Target() {
+ RegisterTargetMachine<AArch64TargetMachine> X(TheAArch64Target);
+}
+
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ InstrInfo(Subtarget),
+ DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"),
+ TLInfo(*this),
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
+}
+
+namespace {
+/// AArch64 Code Generator Pass Configuration Options.
+class AArch64PassConfig : public TargetPassConfig {
+public:
+ AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AArch64TargetMachine &getAArch64TargetMachine() const {
+ return getTM<AArch64TargetMachine>();
+ }
+
+ const AArch64Subtarget &getAArch64Subtarget() const {
+ return *getAArch64TargetMachine().getSubtargetImpl();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AArch64PassConfig(this, PM);
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+ addPass(&UnpackMachineBundlesID);
+ addPass(createAArch64BranchFixupPass());
+ return true;
+}
+
+bool AArch64PassConfig::addInstSelector() {
+ addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+
+ // For ELF, cleanup any local-dynamic TLS accesses.
+ if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64CleanupLocalDynamicTLSPass());
+
+ return false;
+}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
new file mode 100644
index 0000000..c1f47c2
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -0,0 +1,69 @@
+//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AArch64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETMACHINE_H
+#define LLVM_AARCH64TARGETMACHINE_H
+
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AArch64TargetMachine : public LLVMTargetMachine {
+ AArch64Subtarget Subtarget;
+ AArch64InstrInfo InstrInfo;
+ const DataLayout DL;
+ AArch64TargetLowering TLInfo;
+ AArch64SelectionDAGInfo TSInfo;
+ AArch64FrameLowering FrameLowering;
+
+public:
+ AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ const AArch64InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+
+ const AArch64FrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ const AArch64TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ const DataLayout *getDataLayout() const { return &DL; }
+
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
new file mode 100644
index 0000000..b4452f5
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64TargetObjectFile.h"
+
+using namespace llvm;
+
+void
+AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
new file mode 100644
index 0000000..bf0565a
--- /dev/null
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -0,0 +1,31 @@
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file deals with any AArch64 specific requirements on object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+ /// AArch64LinuxTargetObjectFile - This implementation is used for linux
+ /// AArch64.
+ class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
new file mode 100644
index 0000000..c1695da
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -0,0 +1,2188 @@
+//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the (GNU-style) assembly parser for the AArch64
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64Operand;
+
+class AArch64AsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+
+#define GET_ASSEMBLER_HEADER
+#include "AArch64GenAsmMatcher.inc"
+
+public:
+ enum AArch64MatchResultTy {
+ Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "AArch64GenAsmMatcher.inc"
+ };
+
+ AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ MCAsmParserExtension::Initialize(_Parser);
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ // These are the public interface of the MCTargetAsmParser
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool ParseDirective(AsmToken DirectiveID);
+ bool ParseDirectiveTLSDescCall(SMLoc L);
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer&Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ // The rest of the sub-parsers have more freedom over interface: they return
+ // an OperandMatchResultTy because it's less ambiguous than true/false or
+ // -1/0/1 even if it is more verbose
+ OperandMatchResultTy
+ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic);
+
+ OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal);
+
+ OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind);
+
+ OperandMatchResultTy
+ ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t NumLanes);
+
+ OperandMatchResultTy
+ ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t &NumLanes);
+
+ OperandMatchResultTy
+ ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ template<typename SomeNamedImmMapper> OperandMatchResultTy
+ ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return ParseNamedImmOperand(SomeNamedImmMapper(), Operands);
+ }
+
+ OperandMatchResultTy
+ ParseNamedImmOperand(const NamedImmMapper &Mapper,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ OperandMatchResultTy
+ ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ /// Scan the next token (which had better be an identifier) and determine
+ /// whether it represents a general-purpose or vector register. It returns
+ /// true if an identifier was found and populates its reference arguments. It
+ /// does not consume the token.
+ bool
+ IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec,
+ SMLoc &LayoutLoc) const;
+
+};
+
+}
+
+namespace {
+
+/// Instances of this class represent a parsed AArch64 machine instruction.
+class AArch64Operand : public MCParsedAsmOperand {
+private:
+ enum KindTy {
+ k_ImmWithLSL, // #uimm {, LSL #amt }
+ k_CondCode, // eq/ne/...
+ k_FPImmediate, // Limited-precision floating-point imm
+ k_Immediate, // Including expressions referencing symbols
+ k_Register,
+ k_ShiftExtend,
+ k_SysReg, // The register operand of MRS and MSR instructions
+ k_Token, // The mnemonic; other raw tokens the auto-generated
+ k_WrappedRegister // Load/store exclusive permit a wrapped register.
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ union {
+ struct {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ } ImmWithLSL;
+
+ struct {
+ A64CC::CondCodes Code;
+ } CondCode;
+
+ struct {
+ double Val;
+ } FPImm;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ } ShiftExtend;
+
+ struct {
+ const char *Data;
+ unsigned Length;
+ } SysReg;
+
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+ };
+
+ AArch64Operand(KindTy K, SMLoc S, SMLoc E)
+ : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {}
+
+public:
+ AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() {
+ }
+
+ SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getEndLoc() const { return EndLoc; }
+ void print(raw_ostream&) const;
+ void dump() const;
+
+ StringRef getToken() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ unsigned getReg() const {
+ assert((Kind == k_Register || Kind == k_WrappedRegister)
+ && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == k_Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ A64CC::CondCodes getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CondCode.Code;
+ }
+
+ static bool isNonConstantExpr(const MCExpr *E,
+ AArch64MCExpr::VariantKind &Variant) {
+ if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
+ Variant = A64E->getKind();
+ return true;
+ } else if (!isa<MCConstantExpr>(E)) {
+ Variant = AArch64MCExpr::VK_AARCH64_None;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isToken() const { return Kind == k_Token; }
+ bool isReg() const { return Kind == k_Register; }
+ bool isImm() const { return Kind == k_Immediate; }
+ bool isMem() const { return false; }
+ bool isFPImm() const { return Kind == k_FPImmediate; }
+ bool isShiftOrExtend() const { return Kind == k_ShiftExtend; }
+ bool isSysReg() const { return Kind == k_SysReg; }
+ bool isImmWithLSL() const { return Kind == k_ImmWithLSL; }
+ bool isWrappedReg() const { return Kind == k_WrappedRegister; }
+
+ bool isAddSubImmLSL0() const {
+ if (!isImmWithLSL()) return false;
+ if (ImmWithLSL.ShiftAmount != 0) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC
+ || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+
+ bool isAddSubImmLSL12() const {
+ if (!isImmWithLSL()) return false;
+ if (ImmWithLSL.ShiftAmount != 12) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12
+ || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+
+ template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const {
+ if (!isShiftOrExtend()) return false;
+
+ A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType;
+ if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW))
+ return false;
+
+ if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX))
+ return false;
+
+ return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0;
+ }
+
+ bool isAdrpLabel() const {
+ if (!isImm()) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(getImm(), Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_None
+ || Variant == AArch64MCExpr::VK_AARCH64_GOT
+ || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL
+ || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC;
+ }
+
+ return isLabel<21, 4096>();
+ }
+
+ template<unsigned RegWidth> bool isBitfieldWidth() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ }
+
+ template<int RegWidth>
+ bool isCVTFixedPos() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ }
+
+ bool isFMOVImm() const {
+ if (!isFPImm()) return false;
+
+ APFloat RealVal(FPImm.Val);
+ uint32_t ImmVal;
+ return A64Imms::isFPImm(RealVal, ImmVal);
+ }
+
+ bool isFPZero() const {
+ if (!isFPImm()) return false;
+
+ APFloat RealVal(FPImm.Val);
+ return RealVal.isPosZero();
+ }
+
+ template<unsigned field_width, unsigned scale>
+ bool isLabel() const {
+ if (!isImm()) return false;
+
+ if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) {
+ return true;
+ } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Min = - (scale * (1LL << (field_width - 1)));
+ int64_t Max = scale * ((1LL << (field_width - 1)) - 1);
+ return (Val % scale) == 0 && Val >= Min && Val <= Max;
+ }
+
+ // N.b. this disallows explicit relocation specifications via an
+ // AArch64MCExpr. Users needing that behaviour
+ return false;
+ }
+
+ bool isLane1() const {
+ if (!isImm()) return false;
+
+ // Because it's come through custom assembly parsing, it must always be a
+ // constant expression.
+ return cast<MCConstantExpr>(getImm())->getValue() == 1;
+ }
+
+ bool isLoadLitLabel() const {
+ if (!isImm()) return false;
+
+ AArch64MCExpr::VariantKind Variant;
+ if (isNonConstantExpr(getImm(), Variant)) {
+ return Variant == AArch64MCExpr::VK_AARCH64_None
+ || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL;
+ }
+
+ return isLabel<19, 4>();
+ }
+
+ template<unsigned RegWidth> bool isLogicalImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+ if (!CE) return false;
+
+ uint32_t Bits;
+ return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+ }
+
+ template<unsigned RegWidth> bool isLogicalImmMOV() const {
+ if (!isLogicalImm<RegWidth>()) return false;
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+ // The move alias for ORR is only valid if the immediate cannot be
+ // represented with a move (immediate) instruction; they take priority.
+ int UImm16, Shift;
+ return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift)
+ && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift);
+ }
+
+ template<int MemSize>
+ bool isOffsetUImm12() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+ // Assume they know what they're doing for now if they've given us a
+ // non-constant expression. In principle we could check for ridiculous
+ // things that can't possibly work or relocations that would almost
+ // certainly break resulting code.
+ if (!CE)
+ return true;
+
+ int64_t Val = CE->getValue();
+
+ // Must be a multiple of the access size in bytes.
+ if ((Val & (MemSize - 1)) != 0) return false;
+
+ // Must be 12-bit unsigned
+ return Val >= 0 && Val <= 0xfff * MemSize;
+ }
+
+ template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit>
+ bool isShift() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != SHKind)
+ return false;
+
+ return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31;
+ }
+
+ bool isMOVN32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVN64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G2,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+
+ bool isMOVZ32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0,
+ AArch64MCExpr::VK_AARCH64_ABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVZ64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0,
+ AArch64MCExpr::VK_AARCH64_ABS_G1,
+ AArch64MCExpr::VK_AARCH64_ABS_G2,
+ AArch64MCExpr::VK_AARCH64_ABS_G3,
+ AArch64MCExpr::VK_AARCH64_SABS_G0,
+ AArch64MCExpr::VK_AARCH64_SABS_G1,
+ AArch64MCExpr::VK_AARCH64_SABS_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G2,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G2,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVK32Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(32, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMOVK64Imm() const {
+ static AArch64MCExpr::VariantKind PermittedModifiers[] = {
+ AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G2_NC,
+ AArch64MCExpr::VK_AARCH64_ABS_G3,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
+ AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ };
+ unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+
+ return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ }
+
+ bool isMoveWideImm(unsigned RegWidth,
+ AArch64MCExpr::VariantKind *PermittedModifiers,
+ unsigned NumModifiers) const {
+ if (!isImmWithLSL()) return false;
+
+ if (ImmWithLSL.ShiftAmount % 16 != 0) return false;
+ if (ImmWithLSL.ShiftAmount >= RegWidth) return false;
+
+ AArch64MCExpr::VariantKind Modifier;
+ if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) {
+ // E.g. "#:abs_g0:sym, lsl #16" makes no sense.
+ if (!ImmWithLSL.ImplicitAmount) return false;
+
+ for (unsigned i = 0; i < NumModifiers; ++i)
+ if (PermittedModifiers[i] == Modifier) return true;
+
+ return false;
+ }
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val);
+ return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff;
+ }
+
+ template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)>
+ bool isMoveWideMovAlias() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ int UImm16, Shift;
+ uint64_t Value = CE->getValue();
+
+ // If this is a 32-bit instruction then all bits above 32 should be the
+ // same: either of these is fine because signed/unsigned values should be
+ // permitted.
+ if (RegWidth == 32) {
+ if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff)
+ return false;
+
+ Value &= 0xffffffffULL;
+ }
+
+ return isValidImm(RegWidth, Value, UImm16, Shift);
+ }
+
+ bool isMSRWithReg() const {
+ if (!isSysReg()) return false;
+
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64SysReg::MSRMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isMSRPState() const {
+ if (!isSysReg()) return false;
+
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64PState::PStateMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isMRS() const {
+ if (!isSysReg()) return false;
+
+ // First check against specific MSR-only (write-only) registers
+ bool IsKnownRegister;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ A64SysReg::MRSMapper().fromString(Name, IsKnownRegister);
+
+ return IsKnownRegister;
+ }
+
+ bool isPRFM() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+
+ if (!CE)
+ return false;
+
+ return CE->getValue() >= 0 && CE->getValue() <= 31;
+ }
+
+ template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != SHKind)
+ return false;
+
+ return ShiftExtend.Amount <= 4;
+ }
+
+ bool isRegExtendLSL() const {
+ if (!isShiftOrExtend()) return false;
+
+ if (ShiftExtend.ShiftType != A64SE::LSL)
+ return false;
+
+ return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
+ }
+
+ template<int MemSize> bool isSImm7Scaled() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ int64_t Val = CE->getValue();
+ if (Val % MemSize != 0) return false;
+
+ Val /= MemSize;
+
+ return Val >= -64 && Val < 64;
+ }
+
+ template<int BitWidth>
+ bool isSImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= -(1LL << (BitWidth - 1))
+ && CE->getValue() < (1LL << (BitWidth - 1));
+ }
+
+ template<int bitWidth>
+ bool isUImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth);
+ }
+
+ bool isUImm() const {
+ if (!isImm()) return false;
+
+ return isa<MCConstantExpr>(getImm());
+ }
+
+ static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
+ unsigned ShiftAmount,
+ bool ImplicitAmount,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
+ Op->ImmWithLSL.Val = Val;
+ Op->ImmWithLSL.ShiftAmount = ShiftAmount;
+ Op->ImmWithLSL.ImplicitAmount = ImplicitAmount;
+ return Op;
+ }
+
+ static AArch64Operand *CreateCondCode(A64CC::CondCodes Code,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E);
+ Op->CondCode.Code = Code;
+ return Op;
+ }
+
+ static AArch64Operand *CreateFPImm(double Val,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E);
+ Op->FPImm.Val = Val;
+ return Op;
+ }
+
+ static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E);
+ Op->Imm.Val = Val;
+ return Op;
+ }
+
+ static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_Register, S, E);
+ Op->Reg.RegNum = RegNum;
+ return Op;
+ }
+
+ static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E);
+ Op->Reg.RegNum = RegNum;
+ return Op;
+ }
+
+ static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp,
+ unsigned Amount,
+ bool ImplicitAmount,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E);
+ Op->ShiftExtend.ShiftType = ShiftTyp;
+ Op->ShiftExtend.Amount = Amount;
+ Op->ShiftExtend.ImplicitAmount = ImplicitAmount;
+ return Op;
+ }
+
+ static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) {
+ AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ return Op;
+ }
+
+ static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
+ AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ return Op;
+ }
+
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ template<unsigned RegWidth>
+ void addBFILSBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth;
+ Inst.addOperand(MCOperand::CreateImm(EncodedVal));
+ }
+
+ void addBFIWidthOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ }
+
+ void addBFXWidthOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm();
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+
+ Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+ }
+
+ void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue()));
+ }
+
+ void addFMOVImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ APFloat RealVal(FPImm.Val);
+ uint32_t ImmVal;
+ A64Imms::isFPImm(RealVal, ImmVal);
+
+ Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ }
+
+ void addFPZeroOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ Inst.addOperand(MCOperand::CreateImm(0));
+ }
+
+ void addInvCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Encoded = A64InvertCondCode(getCondCode());
+ Inst.addOperand(MCOperand::CreateImm(Encoded));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ template<int MemSize>
+ void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Val = CE->getValue() / MemSize;
+ Inst.addOperand(MCOperand::CreateImm(Val & 0x7f));
+ }
+
+ template<int BitWidth>
+ void addSImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Val = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1)));
+ }
+
+ void addImmWithLSLOperands(MCInst &Inst, unsigned N) const {
+ assert (N == 1 && "Invalid number of operands!");
+
+ addExpr(Inst, ImmWithLSL.Val);
+ }
+
+ template<unsigned field_width, unsigned scale>
+ void addLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+
+ if (!CE) {
+ addExpr(Inst, Imm.Val);
+ return;
+ }
+
+ int64_t Val = CE->getValue();
+ assert(Val % scale == 0 && "Unaligned immediate in instruction");
+ Val /= scale;
+
+ Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1)));
+ }
+
+ template<int MemSize>
+ void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize));
+ } else {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ }
+ }
+
+ template<unsigned RegWidth>
+ void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+
+ uint32_t Bits;
+ A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMRSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMSRWithRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMSRPStateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ bool Valid;
+ StringRef Name(SysReg.Data, SysReg.Length);
+ uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid);
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ }
+
+ void addMoveWideImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ addExpr(Inst, ImmWithLSL.Val);
+
+ AArch64MCExpr::VariantKind Variant;
+ if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) {
+ Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16));
+ return;
+ }
+
+ // We know it's relocated
+ switch (Variant) {
+ case AArch64MCExpr::VK_AARCH64_ABS_G0:
+ case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1:
+ case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+ Inst.addOperand(MCOperand::CreateImm(1));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2:
+ case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ Inst.addOperand(MCOperand::CreateImm(2));
+ break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G3:
+ Inst.addOperand(MCOperand::CreateImm(3));
+ break;
+ default: llvm_unreachable("Inappropriate move wide relocation");
+ }
+ }
+
+ template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)>
+ void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ int UImm16, Shift;
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+
+ if (RegWidth == 32) {
+ Value &= 0xffffffffULL;
+ }
+
+ bool Valid = isValidImm(RegWidth, Value, UImm16, Shift);
+ (void)Valid;
+ assert(Valid && "Invalid immediates should have been weeded out by now");
+
+ Inst.addOperand(MCOperand::CreateImm(UImm16));
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+ }
+
+ void addPRFMOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ assert(CE->getValue() >= 0 && CE->getValue() <= 31
+ && "PRFM operand should be 5-bits");
+
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ // For Add-sub (extended register) operands.
+ void addRegExtendOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ }
+
+ // For the extend in load-store (register offset) instructions.
+ template<unsigned MemSize>
+ void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
+ addAddrRegExtendOperands(Inst, N, MemSize);
+ }
+
+ void addAddrRegExtendOperands(MCInst &Inst, unsigned N,
+ unsigned MemSize) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // First bit of Option is set in instruction classes, the high two bits are
+ // as follows:
+ unsigned OptionHi = 0;
+ switch (ShiftExtend.ShiftType) {
+ case A64SE::UXTW:
+ case A64SE::LSL:
+ OptionHi = 1;
+ break;
+ case A64SE::SXTW:
+ case A64SE::SXTX:
+ OptionHi = 3;
+ break;
+ default:
+ llvm_unreachable("Invalid extend type for register offset");
+ }
+
+ unsigned S = 0;
+ if (MemSize == 1 && !ShiftExtend.ImplicitAmount)
+ S = 1;
+ else if (MemSize != 1 && ShiftExtend.Amount != 0)
+ S = 1;
+
+ Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S));
+ }
+ void addShiftOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ }
+};
+
+} // end anonymous namespace.
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+
+ // See if the operand has a custom parser
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+
+ // It could either succeed, fail or just not care.
+ if (ResTy != MatchOperand_NoMatch)
+ return ResTy;
+
+ switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return MatchOperand_ParseFail;
+ case AsmToken::Identifier: {
+ // It might be in the LSL/UXTB family ...
+ OperandMatchResultTy GotShift = ParseShiftExtend(Operands);
+
+ // We can only continue if no tokens were eaten.
+ if (GotShift != MatchOperand_NoMatch)
+ return GotShift;
+
+ // ... or it might be a register ...
+ uint32_t NumLanes = 0;
+ OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes);
+ assert(GotReg != MatchOperand_ParseFail
+ && "register parsing shouldn't partially succeed");
+
+ if (GotReg == MatchOperand_Success) {
+ if (Parser.getTok().is(AsmToken::LBrac))
+ return ParseNEONLane(Operands, NumLanes);
+ else
+ return MatchOperand_Success;
+ }
+
+ // ... or it might be a symbolish thing
+ }
+ // Fall through
+ case AsmToken::LParen: // E.g. (strcmp-4)
+ case AsmToken::Integer: // 1f, 2b labels
+ case AsmToken::String: // quoted labels
+ case AsmToken::Dot: // . is Current location
+ case AsmToken::Dollar: // $ is PC
+ case AsmToken::Colon: {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ const MCExpr *ImmVal = 0;
+
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+ return MatchOperand_Success;
+ }
+ case AsmToken::Hash: { // Immediates
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ SMLoc EndLoc;
+ const MCExpr *ImmVal = 0;
+ Parser.Lex();
+
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
+ return MatchOperand_Success;
+ }
+ case AsmToken::LBrac: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+ Parser.Lex(); // Eat '['
+
+ // There's no comma after a '[', so we can parse the next operand
+ // immediately.
+ return ParseOperand(Operands, Mnemonic);
+ }
+ // The following will likely be useful later, but not in very early cases
+ case AsmToken::LCurly: // Weird SIMD lists
+ llvm_unreachable("Don't know how to deal with '{' in operand");
+ return MatchOperand_ParseFail;
+ }
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) {
+ if (getLexer().is(AsmToken::Colon)) {
+ AArch64MCExpr::VariantKind RefKind;
+
+ OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind);
+ if (ResTy != MatchOperand_Success)
+ return ResTy;
+
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return MatchOperand_ParseFail;
+
+ ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext());
+ return MatchOperand_Success;
+ }
+
+ // No weird AArch64MCExpr prefix
+ return getParser().parseExpression(ExprVal)
+ ? MatchOperand_ParseFail : MatchOperand_Success;
+}
+
+// A lane attached to a NEON register. "[N]", which should yield three tokens:
+// '[', N, ']'. A hash is not allowed to precede the immediate here.
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t NumLanes) {
+ SMLoc Loc = Parser.getTok().getLoc();
+
+ assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand");
+ Operands.push_back(AArch64Operand::CreateToken("[", Loc));
+ Parser.Lex(); // Eat '['
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "expected lane number");
+ return MatchOperand_ParseFail;
+ }
+
+ if (Parser.getTok().getIntVal() >= NumLanes) {
+ Error(Parser.getTok().getLoc(), "lane number incompatible with layout");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(),
+ getContext());
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat actual lane
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImm(Lane, S, E));
+
+
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(Parser.getTok().getLoc(), "expected ']' after lane");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Parser.Lex(); // Eat ']'
+
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) {
+ assert(getLexer().is(AsmToken::Colon) && "expected a ':'");
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(),
+ "expected relocation specifier in operand after ':'");
+ return MatchOperand_ParseFail;
+ }
+
+ std::string LowerCase = Parser.getTok().getIdentifier().lower();
+ RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
+ .Case("got", AArch64MCExpr::VK_AARCH64_GOT)
+ .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12)
+ .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12)
+ .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0)
+ .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC)
+ .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1)
+ .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC)
+ .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2)
+ .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC)
+ .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3)
+ .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0)
+ .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1)
+ .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2)
+ .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2)
+ .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1)
+ .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC)
+ .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0)
+ .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC)
+ .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12)
+ .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12)
+ .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC)
+ .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1)
+ .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC)
+ .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL)
+ .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12)
+ .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2)
+ .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1)
+ .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC)
+ .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0)
+ .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC)
+ .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12)
+ .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12)
+ .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC)
+ .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC)
+ .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12)
+ .Default(AArch64MCExpr::VK_AARCH64_None);
+
+ if (RefKind == AArch64MCExpr::VK_AARCH64_None) {
+ Error(Parser.getTok().getLoc(),
+ "expected relocation specifier in operand after ':'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat identifier
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(),
+ "expected ':' after relocation specifier");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseImmWithLSLOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // FIXME?: I want to live in a world where immediates must start with
+ // #. Please don't dash my hopes (well, do if you have a good reason).
+ if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '#'
+
+ const MCExpr *Imm;
+ if (ParseImmediate(Imm) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ else if (Parser.getTok().isNot(AsmToken::Comma)) {
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E));
+ return MatchOperand_Success;
+ }
+
+ // Eat ','
+ Parser.Lex();
+
+ // The optional operand must be "lsl #N" where N is non-negative.
+ if (Parser.getTok().is(AsmToken::Identifier)
+ && Parser.getTok().getIdentifier().lower() == "lsl") {
+ Parser.Lex();
+
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex();
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+ return MatchOperand_ParseFail;
+ }
+ }
+ }
+
+ int64_t ShiftAmount = Parser.getTok().getIntVal();
+
+ if (ShiftAmount < 0) {
+ Error(Parser.getTok().getLoc(), "positive shift amount required");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat the number
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount,
+ false, S, E));
+ return MatchOperand_Success;
+}
+
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCondCodeOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+
+ StringRef Tok = Parser.getTok().getIdentifier();
+ A64CC::CondCodes CondCode = A64StringToCondCode(Tok);
+
+ if (CondCode == A64CC::Invalid)
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat condition code
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseCRxOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ std::string LowerTok = Parser.getTok().getIdentifier().lower();
+ StringRef Tok(LowerTok);
+ if (Tok[0] != 'c') {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ uint32_t CRNum;
+ bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
+ if (BadNum || CRNum > 15) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
+
+ const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext());
+
+ Parser.Lex();
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseFPImmOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ // FIXME?: I want to live in a world where immediates must start with
+ // #. Please don't dash my hopes (well, do if you have a good reason).
+ if (Parser.getTok().isNot(AsmToken::Hash)) return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '#'
+
+ bool Negative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Negative = true;
+ Parser.Lex(); // Eat '-'
+ } else if (Parser.getTok().is(AsmToken::Plus)) {
+ Parser.Lex(); // Eat '+'
+ }
+
+ if (Parser.getTok().isNot(AsmToken::Real)) {
+ Error(S, "Expected floating-point immediate");
+ return MatchOperand_ParseFail;
+ }
+
+ APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString());
+ if (Negative) RealVal.changeSign();
+ double DblVal = RealVal.convertToDouble();
+
+ Parser.Lex(); // Eat real number
+ SMLoc E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E));
+ return MatchOperand_Success;
+}
+
+
+// Automatically generated
+static unsigned MatchRegisterName(StringRef Name);
+
+bool
+AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
+ StringRef &Layout,
+ SMLoc &LayoutLoc) const {
+ const AsmToken &Tok = Parser.getTok();
+
+ if (Tok.isNot(AsmToken::Identifier))
+ return false;
+
+ std::string LowerReg = Tok.getString().lower();
+ size_t DotPos = LowerReg.find('.');
+
+ RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+ if (RegNum == AArch64::NoRegister) {
+ RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+ .Case("ip0", AArch64::X16)
+ .Case("ip1", AArch64::X17)
+ .Case("fp", AArch64::X29)
+ .Case("lr", AArch64::X30)
+ .Default(AArch64::NoRegister);
+ }
+ if (RegNum == AArch64::NoRegister)
+ return false;
+
+ SMLoc S = Tok.getLoc();
+ RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
+
+ if (DotPos == StringRef::npos) {
+ Layout = StringRef();
+ } else {
+ // Everything afterwards needs to be a literal token, expected to be
+ // '.2d','.b' etc for vector registers.
+
+ // This StringSwitch validates the input and (perhaps more importantly)
+ // gives us a permanent string to use in the token (a pointer into LowerReg
+ // would go out of scope when we return).
+ LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
+ std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
+ Layout = StringSwitch<const char *>(LayoutText)
+ .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
+ .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
+ .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
+ .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+ .Default("");
+
+ if (Layout.size() == 0) {
+ // Malformed register
+ return false;
+ }
+ }
+
+ return true;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ uint32_t &NumLanes) {
+ unsigned RegNum;
+ StringRef Layout;
+ SMLoc RegEndLoc, LayoutLoc;
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc));
+
+ if (Layout.size() != 0) {
+ unsigned long long TmpLanes = 0;
+ llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes);
+ if (TmpLanes != 0) {
+ NumLanes = TmpLanes;
+ } else {
+ // If the number of lanes isn't specified explicitly, a valid instruction
+ // will have an element specifier and be capable of acting on the entire
+ // vector register.
+ switch (Layout.back()) {
+ default: llvm_unreachable("Invalid layout specifier");
+ case 'b': NumLanes = 16; break;
+ case 'h': NumLanes = 8; break;
+ case 's': NumLanes = 4; break;
+ case 'd': NumLanes = 2; break;
+ }
+ }
+
+ Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc));
+ }
+
+ Parser.Lex();
+ return MatchOperand_Success;
+}
+
+bool
+AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ // This callback is used for things like DWARF frame directives in
+ // assembly. They don't care about things like NEON layouts or lanes, they
+ // just want to be able to produce the DWARF register number.
+ StringRef LayoutSpec;
+ SMLoc RegEndLoc, LayoutLoc;
+ StartLoc = Parser.getTok().getLoc();
+
+ if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc))
+ return true;
+
+ Parser.Lex();
+ EndLoc = Parser.getTok().getLoc();
+
+ return false;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Since these operands occur in very limited circumstances, without
+ // alternatives, we actually signal an error if there is no match. If relaxing
+ // this, beware of unintended consequences: an immediate will be accepted
+ // during matching, no matter how it gets into the AArch64Operand.
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc S = Tok.getLoc();
+
+ if (Tok.is(AsmToken::Identifier)) {
+ bool ValidName;
+ uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName);
+
+ if (!ValidName) {
+ Error(S, "operand specifier not recognised");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // We're done with the identifier. Eat it
+
+ SMLoc E = Parser.getTok().getLoc();
+ const MCExpr *Imm = MCConstantExpr::Create(Code, getContext());
+ Operands.push_back(AArch64Operand::CreateImm(Imm, S, E));
+ return MatchOperand_Success;
+ } else if (Tok.is(AsmToken::Hash)) {
+ Parser.Lex();
+
+ const MCExpr *ImmVal;
+ if (ParseImmediate(ImmVal) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) {
+ Error(S, "Invalid immediate for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E));
+ return MatchOperand_Success;
+ }
+
+ Error(S, "unexpected operand for instruction");
+ return MatchOperand_ParseFail;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseSysRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+
+ // Any MSR/MRS operand will be an identifier, and we want to store it as some
+ // kind of string: SPSel is valid for two different forms of MSR with two
+ // different encodings. There's no collision at the moment, but the potential
+ // is there.
+ if (!Tok.is(AsmToken::Identifier)) {
+ return MatchOperand_NoMatch;
+ }
+
+ SMLoc S = Tok.getLoc();
+ Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S));
+ Parser.Lex(); // Eat identifier
+
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseLSXAddressOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ unsigned RegNum;
+ SMLoc RegEndLoc, LayoutLoc;
+ StringRef Layout;
+ if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)
+ || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum)
+ || Layout.size() != 0) {
+ // Check Layout.size because we don't want to let "x3.4s" or similar
+ // through.
+ return MatchOperand_NoMatch;
+ }
+ Parser.Lex(); // Eat register
+
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // We're done
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+ return MatchOperand_Success;
+ }
+
+ // Otherwise, only ", #0" is valid
+
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Error(Parser.getTok().getLoc(), "expected ',' or ']' after register");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat ','
+
+ if (Parser.getTok().isNot(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(), "expected '#0'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '#'
+
+ if (Parser.getTok().isNot(AsmToken::Integer)
+ || Parser.getTok().getIntVal() != 0 ) {
+ Error(Parser.getTok().getLoc(), "expected '#0'");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex(); // Eat '0'
+
+ SMLoc E = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
+ return MatchOperand_Success;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::ParseShiftExtend(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ std::string LowerID = IDVal.lower();
+
+ A64SE::ShiftExtSpecifiers Spec =
+ StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
+ .Case("lsl", A64SE::LSL)
+ .Case("lsr", A64SE::LSR)
+ .Case("asr", A64SE::ASR)
+ .Case("ror", A64SE::ROR)
+ .Case("uxtb", A64SE::UXTB)
+ .Case("uxth", A64SE::UXTH)
+ .Case("uxtw", A64SE::UXTW)
+ .Case("uxtx", A64SE::UXTX)
+ .Case("sxtb", A64SE::SXTB)
+ .Case("sxth", A64SE::SXTH)
+ .Case("sxtw", A64SE::SXTW)
+ .Case("sxtx", A64SE::SXTX)
+ .Default(A64SE::Invalid);
+
+ if (Spec == A64SE::Invalid)
+ return MatchOperand_NoMatch;
+
+ // Eat the shift
+ SMLoc S, E;
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ if (Spec != A64SE::LSL && Spec != A64SE::LSR &&
+ Spec != A64SE::ASR && Spec != A64SE::ROR) {
+ // The shift amount can be omitted for the extending versions, but not real
+ // shifts:
+ // add x0, x0, x0, uxtb
+ // is valid, and equivalent to
+ // add x0, x0, x0, uxtb #0
+
+ if (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::EndOfStatement) ||
+ Parser.getTok().is(AsmToken::RBrac)) {
+ Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true,
+ S, E));
+ return MatchOperand_Success;
+ }
+ }
+
+ // Eat # at beginning of immediate
+ if (!Parser.getTok().is(AsmToken::Hash)) {
+ Error(Parser.getTok().getLoc(),
+ "expected #imm after shift specifier");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+
+ // Make sure we do actually have a number
+ if (!Parser.getTok().is(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(),
+ "expected integer shift amount");
+ return MatchOperand_ParseFail;
+ }
+ unsigned Amount = Parser.getTok().getIntVal();
+ Parser.Lex();
+ E = Parser.getTok().getLoc();
+
+ Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false,
+ S, E));
+
+ return MatchOperand_Success;
+}
+
+// FIXME: We would really like to be able to tablegen'erate this.
+bool AArch64AsmParser::
+validateInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case AArch64::BFIwwii:
+ case AArch64::BFIxxii:
+ case AArch64::SBFIZwwii:
+ case AArch64::SBFIZxxii:
+ case AArch64::UBFIZwwii:
+ case AArch64::UBFIZxxii: {
+ unsigned ImmOps = Inst.getNumOperands() - 2;
+ int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+ int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+
+ if (ImmR != 0 && ImmS >= ImmR) {
+ return Error(Operands[4]->getStartLoc(),
+ "requested insert overflows register");
+ }
+ return false;
+ }
+ case AArch64::BFXILwwii:
+ case AArch64::BFXILxxii:
+ case AArch64::SBFXwwii:
+ case AArch64::SBFXxxii:
+ case AArch64::UBFXwwii:
+ case AArch64::UBFXxxii: {
+ unsigned ImmOps = Inst.getNumOperands() - 2;
+ int64_t ImmR = Inst.getOperand(ImmOps).getImm();
+ int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
+ int64_t RegWidth = 0;
+ switch (Inst.getOpcode()) {
+ case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii:
+ RegWidth = 64;
+ break;
+ case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii:
+ RegWidth = 32;
+ break;
+ }
+
+ if (ImmS >= RegWidth || ImmS < ImmR) {
+ return Error(Operands[4]->getStartLoc(),
+ "requested extract overflows register");
+ }
+ return false;
+ }
+ case AArch64::ICix: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+ if (!A64IC::NeedsRegister(ICOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified IC op does not use a register");
+ }
+ return false;
+ }
+ case AArch64::ICi: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
+ if (A64IC::NeedsRegister(ICOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified IC op requires a register");
+ }
+ return false;
+ }
+ case AArch64::TLBIix: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+ if (!A64TLBI::NeedsRegister(TLBIOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified TLBI op does not use a register");
+ }
+ return false;
+ }
+ case AArch64::TLBIi: {
+ int64_t ImmVal = Inst.getOperand(0).getImm();
+ A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
+ if (A64TLBI::NeedsRegister(TLBIOp)) {
+ return Error(Operands[1]->getStartLoc(),
+ "specified TLBI op requires a register");
+ }
+ return false;
+ }
+ }
+
+ return false;
+}
+
+
+// Parses the instruction *together with* all operands, appending each parsed
+// operand to the "Operands" list
+bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ size_t CondCodePos = Name.find('.');
+
+ StringRef Mnemonic = Name.substr(0, CondCodePos);
+ Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc));
+
+ if (CondCodePos != StringRef::npos) {
+ // We have a condition code
+ SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1);
+ StringRef CondStr = Name.substr(CondCodePos + 1, StringRef::npos);
+ A64CC::CondCodes Code;
+
+ Code = A64StringToCondCode(CondStr);
+
+ if (Code == A64CC::Invalid) {
+ Error(S, "invalid condition code");
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos);
+
+ Operands.push_back(AArch64Operand::CreateToken(".", DotL));
+ SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3);
+ Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E));
+ }
+
+ // Now we parse the operands of this instruction
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+
+ // Parse and remember the operand.
+ if (ParseOperand(Operands, Mnemonic)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+
+ // After successfully parsing some operands there are two special cases to
+ // consider (i.e. notional operands not separated by commas). Both are due
+ // to memory specifiers:
+ // + An RBrac will end an address for load/store/prefetch
+ // + An '!' will indicate a pre-indexed operation.
+ //
+ // It's someone else's responsibility to make sure these tokens are sane
+ // in the given context!
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Parser.Lex();
+ }
+
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("!", Loc));
+ Parser.Lex();
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "expected comma before next operand");
+ }
+
+ // Eat the EndOfStatement
+ Parser.Lex();
+
+ return false;
+}
+
+bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".hword")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ else if (IDVal == ".word")
+ return ParseDirectiveWord(4, DirectiveID.getLoc());
+ else if (IDVal == ".xword")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ else if (IDVal == ".tlsdesccall")
+ return ParseDirectiveTLSDescCall(DirectiveID.getLoc());
+
+ return true;
+}
+
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+// parseDirectiveTLSDescCall:
+// ::= .tlsdesccall symbol
+bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return Error(L, "expected symbol after directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+
+ MCInst Inst;
+ Inst.setOpcode(AArch64::TLSDESCCALL);
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+
+ getParser().getStreamer().EmitInstruction(Inst);
+ return false;
+}
+
+
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+
+ if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ switch (MatchResult) {
+ default: break;
+ case Match_Success:
+ if (validateInstruction(Inst, Operands))
+ return true;
+
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+
+ case Match_AddSubRegExtendSmall:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegExtendLarge:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegShift32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]");
+ case Match_AddSubRegShift64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]");
+ case Match_AddSubSecondSource:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register, symbol or integer in range [0, 4095]");
+ case Match_CVTFixedPos32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 32]");
+ case Match_CVTFixedPos64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 64]");
+ case Match_CondCode:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected AArch64 condition code");
+ case Match_FPImm:
+ // Any situation which allows a nontrivial floating-point constant also
+ // allows a register.
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register or floating-point constant");
+ case Match_FPZero:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected floating-point constant #0.0");
+ case Match_Label:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected label or encodable integer pc offset");
+ case Match_Lane1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected lane specifier '[1]'");
+ case Match_LoadStoreExtend32_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0");
+ case Match_LoadStoreExtend32_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1");
+ case Match_LoadStoreExtend32_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2");
+ case Match_LoadStoreExtend32_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3");
+ case Match_LoadStoreExtend32_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtw' with optional shift of #0 or #4");
+ case Match_LoadStoreExtend64_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0");
+ case Match_LoadStoreExtend64_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #1");
+ case Match_LoadStoreExtend64_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #2");
+ case Match_LoadStoreExtend64_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #3");
+ case Match_LoadStoreExtend64_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #4");
+ case Match_LoadStoreSImm7_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 4 in range [-256, 252]");
+ case Match_LoadStoreSImm7_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 8 in range [-512, 508]");
+ case Match_LoadStoreSImm7_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer multiple of 16 in range [-1024, 1016]");
+ case Match_LoadStoreSImm9:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [-256, 255]");
+ case Match_LoadStoreUImm12_1:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 4095]");
+ case Match_LoadStoreUImm12_2:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 8190]");
+ case Match_LoadStoreUImm12_4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 16380]");
+ case Match_LoadStoreUImm12_8:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 32760]");
+ case Match_LoadStoreUImm12_16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic reference or integer in range [0, 65520]");
+ case Match_LogicalSecondSource:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected compatible register or logical immediate");
+ case Match_MOVWUImm16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected relocated symbol or integer in range [0, 65535]");
+ case Match_MRS:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected readable system register");
+ case Match_MSR:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected writable system register or pstate");
+ case Match_NamedImm_at:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]");
+ case Match_NamedImm_dbarrier:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15] or symbolic barrier operand");
+ case Match_NamedImm_dc:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected symbolic 'dc' operand");
+ case Match_NamedImm_ic:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'");
+ case Match_NamedImm_isb:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15] or 'sy'");
+ case Match_NamedImm_prefetch:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected prefetch hint: p(ld|st|i)l[123](strm|keep)");
+ case Match_NamedImm_tlbi:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected translation buffer invalidation operand");
+ case Match_UImm16:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 65535]");
+ case Match_UImm3:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 7]");
+ case Match_UImm4:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15]");
+ case Match_UImm5:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 31]");
+ case Match_UImm6:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 63]");
+ case Match_UImm7:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 127]");
+ case Match_Width32:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [<lsb>, 31]");
+ case Match_Width64:
+ return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [<lsb>, 63]");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+void AArch64Operand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_CondCode:
+ OS << "<CondCode: " << CondCode.Code << ">";
+ break;
+ case k_FPImmediate:
+ OS << "<fpimm: " << FPImm.Val << ">";
+ break;
+ case k_ImmWithLSL:
+ OS << "<immwithlsl: imm=" << ImmWithLSL.Val
+ << ", shift=" << ImmWithLSL.ShiftAmount << ">";
+ break;
+ case k_Immediate:
+ getImm()->print(OS);
+ break;
+ case k_Register:
+ OS << "<register " << getReg() << '>';
+ break;
+ case k_Token:
+ OS << '\'' << getToken() << '\'';
+ break;
+ case k_ShiftExtend:
+ OS << "<shift: type=" << ShiftExtend.ShiftType
+ << ", amount=" << ShiftExtend.Amount << ">";
+ break;
+ case k_SysReg: {
+ StringRef Name(SysReg.Data, SysReg.Length);
+ OS << "<sysreg: " << Name << '>';
+ break;
+ }
+ default:
+ llvm_unreachable("No idea how to print this kind of operand");
+ break;
+ }
+}
+
+void AArch64Operand::dump() const {
+ print(errs());
+}
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmParser() {
+ RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AArch64GenAsmMatcher.inc"
diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..a018a0a
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmParser
+ AArch64AsmParser.cpp
+ )
+
+add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
new file mode 100644
index 0000000..bd1fcaf
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmParser
+parent = AArch64
+required_libraries = AArch64Desc AArch64Info MC MCParser Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile
new file mode 100644
index 0000000..56c9ef5
--- /dev/null
+++ b/lib/Target/AArch64/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmParser/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmParser
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
new file mode 100644
index 0000000..8164d6f
--- /dev/null
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -0,0 +1,36 @@
+set(LLVM_TARGET_DEFINITIONS AArch64.td)
+
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(AArch64CommonTableGen)
+
+add_llvm_target(AArch64CodeGen
+ AArch64AsmPrinter.cpp
+ AArch64BranchFixupPass.cpp
+ AArch64FrameLowering.cpp
+ AArch64ISelDAGToDAG.cpp
+ AArch64ISelLowering.cpp
+ AArch64InstrInfo.cpp
+ AArch64MachineFunctionInfo.cpp
+ AArch64MCInstLower.cpp
+ AArch64RegisterInfo.cpp
+ AArch64SelectionDAGInfo.cpp
+ AArch64Subtarget.cpp
+ AArch64TargetMachine.cpp
+ AArch64TargetObjectFile.cpp
+ )
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils) \ No newline at end of file
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
new file mode 100644
index 0000000..eba7666
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -0,0 +1,787 @@
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the functions necessary to decode AArch64 instruction
+// bitpatterns into MCInsts (with the help of TableGenerated information from
+// the instruction definitions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+/// AArch64 disassembler for all AArch64 platforms.
+class AArch64Disassembler : public MCDisassembler {
+ const MCRegisterInfo *RegInfo;
+public:
+ /// Initializes the disassembler.
+ ///
+ AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
+ : MCDisassembler(STI), RegInfo(Info) {
+ }
+
+ ~AArch64Disassembler() {
+ }
+
+ /// See MCDisassembler.
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const;
+
+ const MCRegisterInfo *getRegInfo() const { return RegInfo; }
+};
+
+}
+
+// Forward-declarations used in the auto-generated files.
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+ unsigned OptionHiS,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+ unsigned FullImm,
+ uint64_t Address,
+ const void *Decoder);
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+ unsigned Bits,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus
+DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
+ llvm::MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+
+static bool Check(DecodeStatus &Out, DecodeStatus In);
+
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+static bool Check(DecodeStatus &Out, DecodeStatus In) {
+ switch (In) {
+ case MCDisassembler::Success:
+ // Out stays the same.
+ return true;
+ case MCDisassembler::SoftFail:
+ Out = In;
+ return true;
+ case MCDisassembler::Fail:
+ Out = In;
+ return false;
+ }
+ llvm_unreachable("Invalid DecodeStatus!");
+}
+
+DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ CommentStream = &cs;
+
+ uint8_t bytes[4];
+
+ // We want to read exactly 4 bytes of data.
+ if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // Encoded as a small-endian 32-bit word in the stream.
+ uint32_t insn = (bytes[3] << 24) |
+ (bytes[2] << 16) |
+ (bytes[1] << 8) |
+ (bytes[0] << 0);
+
+ // Calling the auto-generated decoder function.
+ DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ Size = 0;
+ return MCDisassembler::Fail;
+}
+
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
+ return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+}
+
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus
+DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
+ unsigned OptionHiS,
+ uint64_t Address,
+ const void *Decoder) {
+ // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
+ // S}. Hence we want to check bit 1.
+ if (!(OptionHiS & 2))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(OptionHiS));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
+ // between 0 and 31.
+ if (Imm6Bits > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
+ unsigned Imm6Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
+ if (Imm6Bits < 32)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
+ return MCDisassembler::Success;
+}
+
+
+template<int RegWidth>
+static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
+ unsigned FullImm,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Imm16 = FullImm & 0xffff;
+ unsigned Shift = FullImm >> 16;
+
+ if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Imm16));
+ Inst.addOperand(MCOperand::CreateImm(Shift));
+ return MCDisassembler::Success;
+}
+
+template<int RegWidth>
+static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
+ unsigned Bits,
+ uint64_t Address,
+ const void *Decoder) {
+ uint64_t Imm;
+ if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Bits));
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder) {
+ // Only values 0-4 are valid for this 3-bit field
+ if (ShiftAmount > 4)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
+ unsigned ShiftAmount,
+ uint64_t Address,
+ const void *Decoder) {
+ // Only values below 32 are valid for a 32-bit register
+ if (ShiftAmount > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
+ unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
+ unsigned SF = fieldFromInstruction(Insn, 31, 1);
+
+ // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
+ // out assertions that it thinks should never be hit.
+ enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
+ Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
+
+ if (!SF) {
+ // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
+ if (ImmR > 31 || ImmS > 31)
+ return MCDisassembler::Fail;
+ }
+
+ if (SF) {
+ DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ // BFM MCInsts use Rd as a source too.
+ if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+ } else {
+ DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+ // BFM MCInsts use Rd as a source too.
+ if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // ASR and LSR have more specific patterns so they won't get here:
+ assert(!(ImmS == 31 && !SF && Opc != BFM)
+ && "shift should have used auto decode");
+ assert(!(ImmS == 63 && SF && Opc != BFM)
+ && "shift should have used auto decode");
+
+ // Extension instructions similarly:
+ if (Opc == SBFM && ImmR == 0) {
+ assert((ImmS != 7 && ImmS != 15) && "extension got here");
+ assert((ImmS != 31 || SF == 0) && "extension got here");
+ } else if (Opc == UBFM && ImmR == 0) {
+ assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
+ }
+
+ if (Opc == UBFM) {
+ // It might be a LSL instruction, which actually takes the shift amount
+ // itself as an MCInst operand.
+ if (SF && (ImmS + 1) % 64 == ImmR) {
+ Inst.setOpcode(AArch64::LSLxxi);
+ Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
+ return MCDisassembler::Success;
+ } else if (!SF && (ImmS + 1) % 32 == ImmR) {
+ Inst.setOpcode(AArch64::LSLwwi);
+ Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
+ return MCDisassembler::Success;
+ }
+ }
+
+ // Otherwise it's definitely either an extract or an insert depending on which
+ // of ImmR or ImmS is larger.
+ unsigned ExtractOp, InsertOp;
+ switch (Opc) {
+ default: llvm_unreachable("unexpected instruction trying to decode bitfield");
+ case SBFM:
+ ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
+ InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
+ break;
+ case BFM:
+ ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
+ InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
+ break;
+ case UBFM:
+ ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
+ InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
+ break;
+ }
+
+ // Otherwise it's a boring insert or extract
+ Inst.addOperand(MCOperand::CreateImm(ImmR));
+ Inst.addOperand(MCOperand::CreateImm(ImmS));
+
+
+ if (ImmS < ImmR)
+ Inst.setOpcode(InsertOp);
+ else
+ Inst.setOpcode(ExtractOp);
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // This decoder exists to add the dummy Lane operand to the MCInst, which must
+ // be 1 in assembly but has no other real manifestation.
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
+
+ if (IsToVec) {
+ DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
+ } else {
+ DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // Add the lane
+ Inst.addOperand(MCOperand::CreateImm(1));
+
+ return MCDisassembler::Success;
+}
+
+
+static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus Result = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
+ unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
+ unsigned L = fieldFromInstruction(Insn, 22, 1);
+ unsigned V = fieldFromInstruction(Insn, 26, 1);
+ unsigned Opc = fieldFromInstruction(Insn, 30, 2);
+
+ // Not an official name, but it turns out that bit 23 distinguishes indexed
+ // from non-indexed operations.
+ unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
+
+ if (Indexed && L == 0) {
+ // The MCInst for an indexed store has an out operand and 4 ins:
+ // Rn_wb, Rt, Rt2, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ // You shouldn't load to the same register twice in an instruction...
+ if (L && Rt == Rt2)
+ Result = MCDisassembler::SoftFail;
+
+ // ... or do any operation that writes-back to a transfer register. But note
+ // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+ if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+ Result = MCDisassembler::SoftFail;
+
+ // Exactly how we decode the MCInst's registers depends on the Opc and V
+ // fields of the instruction. These also obviously determine the size of the
+ // operation so we can fill in that information while we're at it.
+ if (V) {
+ // The instruction operates on the FP/SIMD registers
+ switch (Opc) {
+ default: return MCDisassembler::Fail;
+ case 0:
+ DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 1:
+ DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 2:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ }
+ } else {
+ switch (Opc) {
+ default: return MCDisassembler::Fail;
+ case 0:
+ DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 1:
+ assert(L && "unexpected \"store signed\" attempt");
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ case 2:
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
+ break;
+ }
+ }
+
+ if (Indexed && L == 1) {
+ // The MCInst for an indexed load has 3 out operands and an 3 ins:
+ // Rt, Rt2, Rn_wb, Rt2, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(SImm7));
+
+ return Result;
+}
+
+static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
+ uint32_t Val,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Val, 0, 5);
+ unsigned Rn = fieldFromInstruction(Val, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
+ unsigned MemSize = fieldFromInstruction(Val, 30, 2);
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (Rt == Rt2) S = MCDisassembler::SoftFail;
+
+ switch (MemSize) {
+ case 2:
+ if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case 3:
+ if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
+ }
+
+ if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template<typename SomeNamedImmMapper>
+static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ SomeNamedImmMapper Mapper;
+ bool ValidNamed;
+ Mapper.toString(Val, ValidNamed);
+ if (ValidNamed || Mapper.validImm(Val)) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ bool ValidNamed;
+ Mapper.toString(Val, ValidNamed);
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+
+ return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
+ Decoder);
+}
+
+static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
+ Decoder);
+}
+
+static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
+
+ unsigned Opc = fieldFromInstruction(Insn, 22, 2);
+ unsigned V = fieldFromInstruction(Insn, 26, 1);
+ unsigned Size = fieldFromInstruction(Insn, 30, 2);
+
+ if (Opc == 0 || (V == 1 && Opc == 2)) {
+ // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ if (V == 0 && (Opc == 2 || Size == 3)) {
+ DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
+ } else if (V == 0) {
+ DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
+ } else if (V == 1 && (Opc & 2)) {
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ } else {
+ switch (Size) {
+ case 0:
+ DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 1:
+ DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ if (Opc != 0 && (V != 1 || Opc != 2)) {
+ // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ }
+
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(Imm9));
+
+ // N.b. The official documentation says undpredictable if Rt == Rn, but this
+ // takes place at the architectural rather than encoding level:
+ //
+ // "STR xzr, [sp], #4" is perfectly valid.
+ if (V == 0 && Rt == Rn && Rn != 31)
+ return MCDisassembler::SoftFail;
+ else
+ return MCDisassembler::Success;
+}
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+ const MCSubtargetInfo &STI) {
+ return new AArch64Disassembler(STI, T.createMCRegInfo(""));
+}
+
+extern "C" void LLVMInitializeAArch64Disassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheAArch64Target,
+ createAArch64Disassembler);
+}
+
+
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..d4bd163
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Disassembler
+ AArch64Disassembler.cpp
+ )
+
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
new file mode 100644
index 0000000..a93e343
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Disassembler
+parent = AArch64
+required_libraries = AArch64CodeGen AArch64Desc AArch64Info AArch64Utils MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile
new file mode 100644
index 0000000..5c86120
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/Disassembler/Makefile ------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Disassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
new file mode 100644
index 0000000..82ce80c
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -0,0 +1,408 @@
+//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter.inc"
+
+static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
+ assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
+ if (Value & (1ULL << (BitWidth - 1)))
+ return static_cast<int64_t>(Value) - (1LL << BitWidth);
+ else
+ return Value;
+}
+
+AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void
+AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+ int32_t Imm = unpackSignedImm(9, MOImm.getImm());
+
+ O << '#' << Imm;
+}
+
+void
+AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned MemSize,
+ unsigned RmSize) {
+ unsigned ExtImm = MI->getOperand(OpNum).getImm();
+ unsigned OptionHi = ExtImm >> 1;
+ unsigned S = ExtImm & 1;
+ bool IsLSL = OptionHi == 1 && RmSize == 64;
+
+ const char *Ext;
+ switch (OptionHi) {
+ case 1:
+ Ext = (RmSize == 32) ? "uxtw" : "lsl";
+ break;
+ case 3:
+ Ext = (RmSize == 32) ? "sxtw" : "sxtx";
+ break;
+ default:
+ llvm_unreachable("Incorrect Option on load/store (reg offset)");
+ }
+ O << Ext;
+
+ if (S) {
+ unsigned ShiftAmt = Log2_32(MemSize);
+ O << " #" << ShiftAmt;
+ } else if (IsLSL) {
+ O << " #0";
+ }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O) {
+ const MCOperand &Imm12Op = MI->getOperand(OpNum);
+
+ if (Imm12Op.isImm()) {
+ int64_t Imm12 = Imm12Op.getImm();
+ assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
+ O << "#" << Imm12;
+ } else {
+ assert(Imm12Op.isExpr() && "Unexpected shift operand type");
+ O << "#" << *Imm12Op.getExpr();
+ }
+}
+
+void
+AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+
+ printAddSubImmLSL0Operand(MI, OpNum, O);
+
+ O << ", lsl #12";
+}
+
+void
+AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << MO.getImm();
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmROp = MI->getOperand(OpNum);
+ unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+
+ O << '#' << LSB;
+}
+
+void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmSOp = MI->getOperand(OpNum);
+ unsigned Width = ImmSOp.getImm() + 1;
+
+ O << '#' << Width;
+}
+
+void
+AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ImmSOp = MI->getOperand(OpNum);
+ const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+
+ unsigned ImmR = ImmROp.getImm();
+ unsigned ImmS = ImmSOp.getImm();
+
+ assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+
+ O << '#' << (ImmS - ImmR + 1);
+}
+
+void
+AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &CRx = MI->getOperand(OpNum);
+
+ O << 'c' << CRx.getImm();
+}
+
+
+void
+AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &ScaleOp = MI->getOperand(OpNum);
+
+ O << '#' << (64 - ScaleOp.getImm());
+}
+
+
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o) {
+ const MCOperand &MOImm8 = MI->getOperand(OpNum);
+
+ assert(MOImm8.isImm()
+ && "Immediate operand required for floating-point immediate inst");
+
+ uint32_t Imm8 = MOImm8.getImm();
+ uint32_t Fraction = Imm8 & 0xf;
+ uint32_t Exponent = (Imm8 >> 4) & 0x7;
+ uint32_t Negative = (Imm8 >> 7) & 0x1;
+
+ float Val = 1.0f + Fraction / 16.0f;
+
+ // That is:
+ // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4,
+ // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
+ if (Exponent & 0x4) {
+ Val /= 1 << (7 - Exponent);
+ } else {
+ Val *= 1 << (Exponent + 1);
+ }
+
+ Val = Negative ? -Val : Val;
+
+ o << '#' << format("%.8f", Val);
+}
+
+void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o) {
+ o << "#0.0";
+}
+
+void
+AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+}
+
+template <unsigned field_width, unsigned scale> void
+AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ if (!MO.isImm()) {
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
+ // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
+ // is multiplied by 4 (because all A64 instructions are 32-bits wide).
+ uint64_t UImm = MO.getImm();
+ uint64_t Sign = UImm & (1LL << (field_width - 1));
+ int64_t SImm = scale * ((UImm & ~Sign) - Sign);
+
+ O << "#" << SImm;
+}
+
+template<unsigned RegWidth> void
+AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ uint64_t Val;
+ A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+ O << "#0x";
+ O.write_hex(Val);
+}
+
+void
+AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, int MemSize) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+
+ if (MOImm.isImm()) {
+ uint32_t Imm = MOImm.getImm() * MemSize;
+
+ O << "#" << Imm;
+ } else {
+ O << "#" << *MOImm.getExpr();
+ }
+}
+
+void
+AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O,
+ A64SE::ShiftExtSpecifiers Shift) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ // LSL #0 is not printed
+ if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
+ return;
+
+ switch (Shift) {
+ case A64SE::LSL: O << "lsl"; break;
+ case A64SE::LSR: O << "lsr"; break;
+ case A64SE::ASR: O << "asr"; break;
+ case A64SE::ROR: O << "ror"; break;
+ default: llvm_unreachable("Invalid shift specifier in logical instruction");
+ }
+
+ O << " #" << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &UImm16MO = MI->getOperand(OpNum);
+ const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+
+ if (UImm16MO.isImm()) {
+ O << '#' << UImm16MO.getImm();
+
+ if (ShiftMO.getImm() != 0)
+ O << ", lsl #" << (ShiftMO.getImm() * 16);
+
+ return;
+ }
+
+ O << "#" << *UImm16MO.getExpr();
+}
+
+void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ bool ValidName;
+ const MCOperand &MO = MI->getOperand(OpNum);
+ StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+
+ if (ValidName)
+ O << Name;
+ else
+ O << '#' << MO.getImm();
+}
+
+void
+AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+
+ bool ValidName;
+ std::string Name = Mapper.toString(MO.getImm(), ValidName);
+ if (ValidName) {
+ O << Name;
+ return;
+ }
+}
+
+
+void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O,
+ A64SE::ShiftExtSpecifiers Ext) {
+ // FIXME: In principle TableGen should be able to detect this itself far more
+ // easily. We will only accumulate more of these hacks.
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+
+ if (isStackReg(Reg0) || isStackReg(Reg1)) {
+ A64SE::ShiftExtSpecifiers LSLEquiv;
+
+ if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
+ LSLEquiv = A64SE::UXTX;
+ else
+ LSLEquiv = A64SE::UXTW;
+
+ if (Ext == LSLEquiv) {
+ O << "lsl #" << MI->getOperand(OpNum).getImm();
+ return;
+ }
+ }
+
+ switch (Ext) {
+ case A64SE::UXTB: O << "uxtb"; break;
+ case A64SE::UXTH: O << "uxth"; break;
+ case A64SE::UXTW: O << "uxtw"; break;
+ case A64SE::UXTX: O << "uxtx"; break;
+ case A64SE::SXTB: O << "sxtb"; break;
+ case A64SE::SXTH: O << "sxth"; break;
+ case A64SE::SXTW: O << "sxtw"; break;
+ case A64SE::SXTX: O << "sxtx"; break;
+ default: llvm_unreachable("Unexpected shift type for printing");
+ }
+
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.getImm() != 0)
+ O << " #" << MO.getImm();
+}
+
+template<int MemScale> void
+AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MOImm = MI->getOperand(OpNum);
+ int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+
+ O << "#" << (Imm * MemScale);
+}
+
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
+ }
+ else {
+ // Otherwise, just print the expression.
+ O << *Op.getExpr();
+ }
+ }
+}
+
+
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ if (MI->getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a special assembler directive which applies an
+ // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
+ // form outside the normal TableGenerated scheme.
+ O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
+ } else if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
+
+ printAnnotation(O, Annot);
+}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
new file mode 100644
index 0000000..639fa86
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -0,0 +1,172 @@
+//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AArch64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64INSTPRINTER_H
+#define LLVM_AARCH64INSTPRINTER_H
+
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class AArch64InstPrinter : public MCInstPrinter {
+public:
+ AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+ // Autogenerated by tblgen
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printRegName(raw_ostream &O, unsigned RegNum) const;
+
+ template<unsigned MemSize, unsigned RmSize>
+ void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+ }
+
+
+ void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned MemSize,
+ unsigned RmSize);
+
+ void printAddSubImmLSL0Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O);
+ void printAddSubImmLSL12Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &O);
+
+ void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<unsigned RegWidth>
+ void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+
+ void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printCRxOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+ void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
+
+ template<int MemScale>
+ void printOffsetUImm12Operand(const MCInst *MI,
+ unsigned OpNum, raw_ostream &o) {
+ printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+ }
+
+ void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &o, int MemScale);
+
+ template<unsigned field_width, unsigned scale>
+ void printLabelOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ template<unsigned RegWidth>
+ void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<typename SomeNamedImmMapper>
+ void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+ }
+
+ void printNamedImmOperand(const NamedImmMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
+ const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printMRSOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+ }
+
+ void printMSROperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+ }
+
+ void printShiftOperand(const char *name, const MCInst *MI,
+ unsigned OpIdx, raw_ostream &O);
+
+ void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("lsr", MI, OpNum, O);
+ }
+ void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("asr", MI, OpNum, O);
+ }
+ void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand("ror", MI, OpNum, O);
+ }
+
+ template<A64SE::ShiftExtSpecifiers Shift>
+ void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printShiftOperand(MI, OpNum, O, Shift);
+ }
+
+ void printShiftOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+
+
+ void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ template<int MemSize> void
+ printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+
+ void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ template<A64SE::ShiftExtSpecifiers EXT>
+ void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ printRegExtendOperand(MI, OpNum, O, EXT);
+ }
+
+ void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+
+ bool isStackReg(unsigned RegNo) {
+ return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+ }
+
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..d4b980a
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64AsmPrinter
+ AArch64InstPrinter.cpp
+ )
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
+
diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000..4836c7c
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64AsmPrinter
+parent = AArch64
+required_libraries = AArch64Utils MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile
new file mode 100644
index 0000000..1c36a8d
--- /dev/null
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64AsmPrinter
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
new file mode 100644
index 0000000..3b296fd
--- /dev/null
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -0,0 +1,36 @@
+;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
+[component_0]
+type = TargetGroup
+name = AArch64
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+;has_jit = 1
+
+[component_1]
+type = Library
+name = AArch64CodeGen
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
new file mode 100644
index 0000000..a3373b1
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -0,0 +1,585 @@
+//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 implementation of the MCAsmBackend class,
+// which is principally concerned with relaxation of the various fixup kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+ const MCSubtargetInfo* STI;
+public:
+ AArch64AsmBackend(const Target &T, const StringRef TT)
+ : MCAsmBackend(),
+ STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
+ {}
+
+
+ ~AArch64AsmBackend() {
+ delete STI;
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ virtual void processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved);
+};
+} // end anonymous namespace
+
+void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFixup &Fixup,
+ const MCFragment *DF,
+ MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+ // ~0xfff. This means that the required offset to reach a symbol can vary by
+ // up to one step depending on where the ADRP is in memory. For example:
+ //
+ // ADRP x0, there
+ // there:
+ //
+ // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+ // we'll need that as an offset. At any other address "there" will be in the
+ // same page as the ADRP and the instruction should encode 0x0. Assuming the
+ // section isn't 0x1000-aligned, we therefore need to delegate this decision
+ // to the linker -- a relocation!
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
+ (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
+ IsResolved = false;
+}
+
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ uint8_t OSABI;
+ ELFAArch64AsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : AArch64AsmBackend(T, TT), OSABI(_OSABI) { }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const;
+
+ unsigned int getNumFixupKinds() const {
+ return AArch64::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// AArch64FixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_add_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst128_lo12", 0, 32, 0 },
+{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_uabs_g0", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g1", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g2", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 },
+{ "fixup_a64_movw_uabs_g3", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g0", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g1", 0, 32, 0 },
+{ "fixup_a64_movw_sabs_g2", 0, 32, 0 },
+{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 },
+{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_movw_tprel_g2", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g1", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g0", 0, 32, 0 },
+{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 },
+{ "fixup_a64_add_tprel_hi12", 0, 32, 0 },
+{ "fixup_a64_add_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 },
+{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 },
+{ "fixup_a64_tlsdesc_call", 0, 0, 0 }
+ };
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ }
+ }
+
+ bool mayNeedRelaxation(const MCInst&) const {
+ return false;
+ }
+
+ void relaxInstruction(const MCInst&, llvm::MCInst&) const {
+ llvm_unreachable("Cannot relax instructions");
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createAArch64ELFObjectWriter(OS, OSABI);
+ }
+};
+
+} // end anonymous namespace
+
+bool
+ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Correct for now. With all instructions 32-bit only very low-level
+ // considerations could make you select something which may fail.
+ return false;
+}
+
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // Can't emit NOP with size not multiple of 32-bits
+ if (Count % 4 != 0)
+ return false;
+
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0xd503201f);
+
+ return true;
+}
+
+static unsigned ADRImmBits(unsigned Value) {
+ unsigned lo2 = Value & 0x3;
+ unsigned hi19 = (Value & 0x1fffff) >> 2;
+
+ return (hi19 << 5) | (lo2 << 29);
+}
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_2:
+ assert((int64_t)Value >= -32768 &&
+ (int64_t)Value <= 65536 &&
+ "Out of range ABS16 fixup");
+ return Value;
+ case FK_Data_4:
+ assert((int64_t)Value >= -(1LL << 31) &&
+ (int64_t)Value <= (1LL << 32) - 1 &&
+ "Out of range ABS32 fixup");
+ return Value;
+ case FK_Data_8:
+ return Value;
+
+ case AArch64::fixup_a64_ld_gottprel_prel19:
+ // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
+ // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
+ case AArch64::fixup_a64_ld_prel:
+ // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
+ // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
+ return (Value & 0x1ffffc) << 3;
+
+ case AArch64::fixup_a64_adr_prel:
+ // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
+ // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
+ return ADRImmBits(Value & 0x1fffff);
+
+ case AArch64::fixup_a64_adr_prel_page:
+ // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
+ // F000 of the result of the operation, checking that -2^32 <= result <
+ // 2^32.
+ assert((int64_t)Value >= -(1LL << 32) &&
+ (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+ return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+ case AArch64::fixup_a64_add_dtprel_hi12:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+ // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
+ case AArch64::fixup_a64_add_tprel_hi12:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+ // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
+ assert((int64_t)Value >= 0 &&
+ (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
+ return (Value & 0xfff000) >> 2;
+
+ case AArch64::fixup_a64_add_dtprel_lo12:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
+ // FFF of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_add_tprel_lo12:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
+ // FFF of TPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t)Value >= 0 &&
+ (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_add_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
+ // FFF of DTPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_add_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
+ // FFF of TPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+ // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
+ // FFF of G(TLSDESC(S+A)), with no overflow check.
+ case AArch64::fixup_a64_add_lo12:
+ // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
+ // S+A, with no overflow check.
+ return (Value & 0xfff) << 10;
+
+ case AArch64::fixup_a64_ldst8_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst8_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst8_lo12:
+ // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
+ // of S+A, with no overflow check.
+ return (Value & 0xfff) << 10;
+
+ case AArch64::fixup_a64_ldst16_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst16_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst16_lo12:
+ // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
+ // of S+A, with no overflow check.
+ return (Value & 0xffe) << 9;
+
+ case AArch64::fixup_a64_ldst32_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst32_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst32_lo12:
+ // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
+ // of S+A, with no overflow check.
+ return (Value & 0xffc) << 8;
+
+ case AArch64::fixup_a64_ldst64_dtprel_lo12:
+ // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ case AArch64::fixup_a64_ldst64_tprel_lo12:
+ // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), check 0 <= X < 2^12.
+ assert((int64_t) Value >= 0 &&
+ (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
+ // ... fallthrough to no-checking versions ...
+ case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+ // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
+ // of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_ldst64_lo12:
+ // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
+ // of S+A, with no overflow check.
+ return (Value & 0xff8) << 7;
+
+ case AArch64::fixup_a64_ldst128_lo12:
+ // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
+ // of S+A, with no overflow check.
+ return (Value & 0xff0) << 6;
+
+ case AArch64::fixup_a64_movw_uabs_g0:
+ // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
+ // with a check that S+A < 2^16
+ assert(Value <= 0xffff && "Out of range move wide fixup");
+ return (Value & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g0_nc:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of DTPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_movw_gottprel_g0_nc:
+ // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
+ case AArch64::fixup_a64_movw_tprel_g0_nc:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
+ // FFFF of TPREL(S+A) with no overflow check.
+ case AArch64::fixup_a64_movw_uabs_g0_nc:
+ // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
+ // S+A with no overflow check.
+ return (Value & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g1:
+ // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
+ // S+A with a check that S+A < 2^32
+ assert(Value <= 0xffffffffull && "Out of range move wide fixup");
+ return ((Value >> 16) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g1_nc:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
+ // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_movw_tprel_g1_nc:
+ // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
+ // to bits FFFF0000 of TPREL(S+A), with no overflow check.
+ case AArch64::fixup_a64_movw_uabs_g1_nc:
+ // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
+ // FFFF0000 of S+A with no overflow check.
+ return ((Value >> 16) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g2:
+ // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
+ // 0000 of S+A with a check that S+A < 2^48
+ assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
+ return ((Value >> 32) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g2_nc:
+ // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
+ // 0000 of S+A with no overflow check.
+ return ((Value >> 32) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_uabs_g3:
+ // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
+ // 0000 0000 of S+A (no overflow check needed)
+ return ((Value >> 48) & 0xffff) << 5;
+
+ case AArch64::fixup_a64_movw_dtprel_g0:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
+ // to bits FFFF of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_tprel_g0:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
+ // bits FFFF of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g0: {
+ // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
+ // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
+ // should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = (Value & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ // MCCodeEmitter should have encoded a MOVN, which is fine.
+ Value = (~Value & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_movw_dtprel_g1:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
+ // to bits FFFF0000 of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_gottprel_g1:
+ // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
+ // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
+ case AArch64::fixup_a64_movw_tprel_g1:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
+ // bits FFFF0000 of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g1: {
+ // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
+ // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
+ // should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = ((Value >> 16) & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ Value = ((~Value >> 16) & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_movw_dtprel_g2:
+ // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
+ // to bits FFFF 0000 0000 of DTPREL(S+A).
+ case AArch64::fixup_a64_movw_tprel_g2:
+ // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
+ // bits FFFF 0000 0000 of TPREL(S+A).
+ case AArch64::fixup_a64_movw_sabs_g2: {
+ // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
+ // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
+ // we should convert between MOVN and MOVZ to achieve our goals).
+ int64_t Signed = Value;
+ assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
+ && "Out of range move wide fixup");
+ if (Signed >= 0) {
+ Value = ((Value >> 32) & 0xffff) << 5;
+ // Bit 30 converts the MOVN encoding into a MOVZ
+ Value |= 1 << 30;
+ } else {
+ Value = ((~Value >> 32) & 0xffff) << 5;
+ }
+ return Value;
+ }
+
+ case AArch64::fixup_a64_tstbr:
+ // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
+ // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
+ assert((int64_t)Value >= -(1LL << 15) &&
+ (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
+ return (Value & 0xfffc) << (5 - 2);
+
+ case AArch64::fixup_a64_condbr:
+ // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
+ // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
+ assert((int64_t)Value >= -(1LL << 20) &&
+ (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
+ return (Value & 0x1ffffc) << (5 - 2);
+
+ case AArch64::fixup_a64_uncondbr:
+ // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
+ case AArch64::fixup_a64_call:
+ // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
+ // checking that -2^27 <= S+A-P < 2^27.
+ assert((int64_t)Value >= -(1LL << 27) &&
+ (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
+ return (Value & 0xffffffc) >> 2;
+
+ case AArch64::fixup_a64_adr_gottprel_page:
+ // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
+ // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
+ // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
+ case AArch64::fixup_a64_adr_prel_got_page:
+ // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
+ // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
+ // 2^32.
+ assert((int64_t)Value >= -(1LL << 32) &&
+ (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
+ return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
+
+ case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+ // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
+ // of X, with no overflow check. Check that X & 7 == 0.
+ case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+ // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
+ // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
+ case AArch64::fixup_a64_ld64_got_lo12_nc:
+ // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
+ // G(S) with no overflow check. Check X & 7 == 0
+ assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
+ return (Value & 0xff8) << 7;
+
+ case AArch64::fixup_a64_tlsdesc_call:
+ // R_AARCH64_TLSDESC_CALL: For relaxation only.
+ return 0;
+ }
+}
+
+MCAsmBackend *
+llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+ Triple TheTriple(TT);
+
+ return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
new file mode 100644
index 0000000..4bcc65d
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -0,0 +1,292 @@
+//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file handles ELF-specific object emission, converting LLVM's internal
+// fixups into the appropriate relocations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ AArch64ELFObjectWriter(uint8_t OSABI);
+
+ virtual ~AArch64ELFObjectWriter();
+
+protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const;
+private:
+};
+}
+
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+ /*HasRelocationAddend*/ true)
+{}
+
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
+{}
+
+unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_8:
+ return ELF::R_AARCH64_PREL64;
+ case FK_Data_4:
+ return ELF::R_AARCH64_PREL32;
+ case FK_Data_2:
+ return ELF::R_AARCH64_PREL16;
+ case AArch64::fixup_a64_ld_prel:
+ Type = ELF::R_AARCH64_LD_PREL_LO19;
+ break;
+ case AArch64::fixup_a64_adr_prel:
+ Type = ELF::R_AARCH64_ADR_PREL_LO21;
+ break;
+ case AArch64::fixup_a64_adr_prel_page:
+ Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
+ break;
+ case AArch64::fixup_a64_adr_prel_got_page:
+ Type = ELF::R_AARCH64_ADR_GOT_PAGE;
+ break;
+ case AArch64::fixup_a64_tstbr:
+ Type = ELF::R_AARCH64_TSTBR14;
+ break;
+ case AArch64::fixup_a64_condbr:
+ Type = ELF::R_AARCH64_CONDBR19;
+ break;
+ case AArch64::fixup_a64_uncondbr:
+ Type = ELF::R_AARCH64_JUMP26;
+ break;
+ case AArch64::fixup_a64_call:
+ Type = ELF::R_AARCH64_CALL26;
+ break;
+ case AArch64::fixup_a64_adr_gottprel_page:
+ Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+ break;
+ case AArch64::fixup_a64_ld_gottprel_prel19:
+ Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+ break;
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented fixup -> relocation");
+ case FK_Data_8:
+ return ELF::R_AARCH64_ABS64;
+ case FK_Data_4:
+ return ELF::R_AARCH64_ABS32;
+ case FK_Data_2:
+ return ELF::R_AARCH64_ABS16;
+ case AArch64::fixup_a64_add_lo12:
+ Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ld64_got_lo12_nc:
+ Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_lo12:
+ Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_lo12:
+ Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_lo12:
+ Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_lo12:
+ Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst128_lo12:
+ Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g0:
+ Type = ELF::R_AARCH64_MOVW_UABS_G0;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g0_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g1:
+ Type = ELF::R_AARCH64_MOVW_UABS_G1;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g1_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g2:
+ Type = ELF::R_AARCH64_MOVW_UABS_G2;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g2_nc:
+ Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
+ break;
+ case AArch64::fixup_a64_movw_uabs_g3:
+ Type = ELF::R_AARCH64_MOVW_UABS_G3;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g0:
+ Type = ELF::R_AARCH64_MOVW_SABS_G0;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g1:
+ Type = ELF::R_AARCH64_MOVW_SABS_G1;
+ break;
+ case AArch64::fixup_a64_movw_sabs_g2:
+ Type = ELF::R_AARCH64_MOVW_SABS_G2;
+ break;
+
+ // TLS Local-dynamic block
+ case AArch64::fixup_a64_movw_dtprel_g2:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g1:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g1_nc:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g0:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+ break;
+ case AArch64::fixup_a64_movw_dtprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_add_dtprel_hi12:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+ break;
+ case AArch64::fixup_a64_add_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_add_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_dtprel_lo12:
+ Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+ break;
+
+ // TLS initial-exec block
+ case AArch64::fixup_a64_movw_gottprel_g1:
+ Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_gottprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+ break;
+
+ // TLS local-exec block
+ case AArch64::fixup_a64_movw_tprel_g2:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g1:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g1_nc:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g0:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+ break;
+ case AArch64::fixup_a64_movw_tprel_g0_nc:
+ Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+ break;
+ case AArch64::fixup_a64_add_tprel_hi12:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+ break;
+ case AArch64::fixup_a64_add_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_add_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst8_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst16_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst32_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+ break;
+ case AArch64::fixup_a64_ldst64_tprel_lo12:
+ Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+ break;
+ case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
+ Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+ break;
+
+ // TLS general-dynamic block
+ case AArch64::fixup_a64_tlsdesc_adr_page:
+ Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ break;
+ case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
+ Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+ break;
+ case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
+ Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+ break;
+ case AArch64::fixup_a64_tlsdesc_call:
+ Type = ELF::R_AARCH64_TLSDESC_CALL;
+ break;
+ }
+ }
+
+ return Type;
+}
+
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
new file mode 100644
index 0000000..b83577a
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -0,0 +1,160 @@
+//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits AArch64 ELF .o object files. Different
+// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
+// regions of data and code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
+/// the appropriate points in the object files. These symbols are defined in the
+/// AArch64 ELF ABI:
+/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
+///
+/// In brief: $x or $d should be emitted at the start of each contiguous region
+/// of A64 code or data in a section. In practice, this emission does not rely
+/// on explicit assembler directives but on inherent properties of the
+/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
+/// instruction).
+///
+/// As a result this system is orthogonal to the DataRegion infrastructure used
+/// by MachO. Beware!
+class AArch64ELFStreamer : public MCELFStreamer {
+public:
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, TAB, OS, Emitter),
+ MappingSymbolCounter(0), LastEMS(EMS_None) {
+ }
+
+ ~AArch64ELFStreamer() {}
+
+ virtual void ChangeSection(const MCSection *Section) {
+ // We have to keep track of the mapping symbol state of any sections we
+ // use. Each one should start off as EMS_None, which is provided as the
+ // default constructor by DenseMap::lookup.
+ LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastEMS = LastMappingSymbols.lookup(Section);
+
+ MCELFStreamer::ChangeSection(Section);
+ }
+
+ /// This function is the one used to emit instruction data into the ELF
+ /// streamer. We override it to add the appropriate mapping symbol if
+ /// necessary.
+ virtual void EmitInstruction(const MCInst& Inst) {
+ EmitA64MappingSymbol();
+ MCELFStreamer::EmitInstruction(Inst);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+ /// if necessary.
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitBytes(Data, AddrSpace);
+ }
+
+ /// This is one of the functions used to emit data into an ELF section, so the
+ /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
+ /// if necessary.
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitDataMappingSymbol();
+ MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace);
+ }
+
+private:
+ enum ElfMappingSymbol {
+ EMS_None,
+ EMS_A64,
+ EMS_Data
+ };
+
+ void EmitDataMappingSymbol() {
+ if (LastEMS == EMS_Data) return;
+ EmitMappingSymbol("$d");
+ LastEMS = EMS_Data;
+ }
+
+ void EmitA64MappingSymbol() {
+ if (LastEMS == EMS_A64) return;
+ EmitMappingSymbol("$x");
+ LastEMS = EMS_A64;
+ }
+
+ void EmitMappingSymbol(StringRef Name) {
+ MCSymbol *Start = getContext().CreateTempSymbol();
+ EmitLabel(Start);
+
+ MCSymbol *Symbol =
+ getContext().GetOrCreateSymbol(Name + "." +
+ Twine(MappingSymbolCounter++));
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ Symbol->setSection(*getCurrentSection());
+
+ const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+ Symbol->setVariableValue(Value);
+ }
+
+ int64_t MappingSymbolCounter;
+
+ DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
+ ElfMappingSymbol LastEMS;
+
+ /// @}
+};
+}
+
+namespace llvm {
+ MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+ }
+}
+
+
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
new file mode 100644
index 0000000..5a89ca5
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -0,0 +1,27 @@
+//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_ELF_STREAMER_H
+#define LLVM_AARCH64_ELF_STREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+
+ MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
+}
+
+#endif // AArch64_ELF_STREAMER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
new file mode 100644
index 0000000..eeb122d
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -0,0 +1,113 @@
+//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LLVM fixups applied to MCInsts in the AArch64
+// backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+ namespace AArch64 {
+ enum Fixups {
+ fixup_a64_ld_prel = FirstTargetFixupKind,
+ fixup_a64_adr_prel,
+ fixup_a64_adr_prel_page,
+
+ fixup_a64_add_lo12,
+
+ fixup_a64_ldst8_lo12,
+ fixup_a64_ldst16_lo12,
+ fixup_a64_ldst32_lo12,
+ fixup_a64_ldst64_lo12,
+ fixup_a64_ldst128_lo12,
+
+ fixup_a64_tstbr,
+ fixup_a64_condbr,
+ fixup_a64_uncondbr,
+ fixup_a64_call,
+
+ fixup_a64_movw_uabs_g0,
+ fixup_a64_movw_uabs_g0_nc,
+ fixup_a64_movw_uabs_g1,
+ fixup_a64_movw_uabs_g1_nc,
+ fixup_a64_movw_uabs_g2,
+ fixup_a64_movw_uabs_g2_nc,
+ fixup_a64_movw_uabs_g3,
+
+ fixup_a64_movw_sabs_g0,
+ fixup_a64_movw_sabs_g1,
+ fixup_a64_movw_sabs_g2,
+
+ fixup_a64_adr_prel_got_page,
+ fixup_a64_ld64_got_lo12_nc,
+
+ // Produce offsets relative to the module's dynamic TLS area.
+ fixup_a64_movw_dtprel_g2,
+ fixup_a64_movw_dtprel_g1,
+ fixup_a64_movw_dtprel_g1_nc,
+ fixup_a64_movw_dtprel_g0,
+ fixup_a64_movw_dtprel_g0_nc,
+ fixup_a64_add_dtprel_hi12,
+ fixup_a64_add_dtprel_lo12,
+ fixup_a64_add_dtprel_lo12_nc,
+ fixup_a64_ldst8_dtprel_lo12,
+ fixup_a64_ldst8_dtprel_lo12_nc,
+ fixup_a64_ldst16_dtprel_lo12,
+ fixup_a64_ldst16_dtprel_lo12_nc,
+ fixup_a64_ldst32_dtprel_lo12,
+ fixup_a64_ldst32_dtprel_lo12_nc,
+ fixup_a64_ldst64_dtprel_lo12,
+ fixup_a64_ldst64_dtprel_lo12_nc,
+
+ // Produce the GOT entry containing a variable's address in TLS's
+ // initial-exec mode.
+ fixup_a64_movw_gottprel_g1,
+ fixup_a64_movw_gottprel_g0_nc,
+ fixup_a64_adr_gottprel_page,
+ fixup_a64_ld64_gottprel_lo12_nc,
+ fixup_a64_ld_gottprel_prel19,
+
+ // Produce offsets relative to the thread pointer: TPIDR_EL0.
+ fixup_a64_movw_tprel_g2,
+ fixup_a64_movw_tprel_g1,
+ fixup_a64_movw_tprel_g1_nc,
+ fixup_a64_movw_tprel_g0,
+ fixup_a64_movw_tprel_g0_nc,
+ fixup_a64_add_tprel_hi12,
+ fixup_a64_add_tprel_lo12,
+ fixup_a64_add_tprel_lo12_nc,
+ fixup_a64_ldst8_tprel_lo12,
+ fixup_a64_ldst8_tprel_lo12_nc,
+ fixup_a64_ldst16_tprel_lo12,
+ fixup_a64_ldst16_tprel_lo12_nc,
+ fixup_a64_ldst32_tprel_lo12,
+ fixup_a64_ldst32_tprel_lo12_nc,
+ fixup_a64_ldst64_tprel_lo12,
+ fixup_a64_ldst64_tprel_lo12_nc,
+
+ // Produce the special fixups used by the general-dynamic TLS model.
+ fixup_a64_tlsdesc_adr_page,
+ fixup_a64_tlsdesc_ld64_lo12_nc,
+ fixup_a64_tlsdesc_add_lo12_nc,
+ fixup_a64_tlsdesc_call,
+
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
new file mode 100644
index 0000000..8ec8cbf
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -0,0 +1,41 @@
+//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AArch64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCAsmInfo.h"
+
+using namespace llvm;
+
+AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
+ PointerSize = 8;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "//";
+ PrivateGlobalPrefix = ".L";
+ Code32Directive = ".code\t32";
+
+ Data16bitsDirective = "\t.hword\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = "\t.xword\t";
+
+ UseDataRegionDirectives = true;
+
+ WeakRefDirective = "\t.weak\t";
+
+ HasLEB128 = true;
+ SupportsDebugInformation = true;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
new file mode 100644
index 0000000..a20bc47
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -0,0 +1,27 @@
+//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AArch64MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64TARGETASMINFO_H
+#define LLVM_AARCH64TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+ struct AArch64ELFMCAsmInfo : public MCAsmInfo {
+ explicit AArch64ELFMCAsmInfo();
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
new file mode 100644
index 0000000..756e037
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -0,0 +1,513 @@
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class AArch64MCCodeEmitter : public MCCodeEmitter {
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ MCContext &Ctx;
+
+public:
+ AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {}
+
+ ~AArch64MCCodeEmitter() {}
+
+ unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ template<int MemSize>
+ unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getOffsetUImm12OpValue(MI, OpIdx, Fixups, MemSize);
+ }
+
+ unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int MemSize) const;
+
+ unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ // Labels are handled mostly the same way: a symbol is needed, and
+ // just gets some fixup attached.
+ template<AArch64::Fixups fixupDesired>
+ unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ unsigned getAddressWithFixup(const MCOperand &MO,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != 4; ++i) {
+ EmitByte(Val & 0xff, OS);
+ Val >>= 8;
+ }
+ }
+
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const;
+
+ template<int hasRs, int hasRt2> unsigned
+ fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
+
+ unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue) const;
+
+ unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue) const;
+
+
+};
+
+} // end anonymous namespace
+
+unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (!MO.isExpr()) {
+ // This can occur for manually decoded or constructed MCInsts, but neither
+ // the assembly-parser nor instruction selection will currently produce an
+ // MCInst that's not a symbol reference.
+ assert(MO.isImm() && "Unexpected address requested");
+ return MO.getImm();
+ }
+
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ return 0;
+}
+
+unsigned AArch64MCCodeEmitter::
+getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int MemSize) const {
+ const MCOperand &ImmOp = MI.getOperand(OpIdx);
+ if (ImmOp.isImm())
+ return ImmOp.getImm();
+
+ assert(ImmOp.isExpr() && "Unexpected operand type");
+ const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
+ unsigned FixupKind;
+
+
+ switch (Expr->getKind()) {
+ default: llvm_unreachable("Unexpected operand modifier");
+ case AArch64MCExpr::VK_AARCH64_LO12: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
+ AArch64::fixup_a64_ldst16_lo12,
+ AArch64::fixup_a64_ldst32_lo12,
+ AArch64::fixup_a64_ldst64_lo12,
+ AArch64::fixup_a64_ldst128_lo12 };
+ assert(MemSize <= 16 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_GOT_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
+ break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12,
+ AArch64::fixup_a64_ldst16_dtprel_lo12,
+ AArch64::fixup_a64_ldst32_dtprel_lo12,
+ AArch64::fixup_a64_ldst64_dtprel_lo12 };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
+ AArch64::fixup_a64_ldst64_dtprel_lo12_nc };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
+ break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12,
+ AArch64::fixup_a64_ldst16_tprel_lo12,
+ AArch64::fixup_a64_ldst32_tprel_lo12,
+ AArch64::fixup_a64_ldst64_tprel_lo12 };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
+ unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst16_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst32_tprel_lo12_nc,
+ AArch64::fixup_a64_ldst64_tprel_lo12_nc };
+ assert(MemSize <= 8 && "Invalid fixup for operation");
+ FixupKind = FixupsBySize[Log2_32(MemSize)];
+ break;
+ }
+ case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+ assert(MemSize == 8 && "Invalid fixup for operation");
+ FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
+ break;
+ }
+
+ return getAddressWithFixup(ImmOp, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ unsigned FixupKind = 0;
+ switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
+ default: llvm_unreachable("Invalid expression modifier");
+ case AArch64MCExpr::VK_AARCH64_LO12:
+ FixupKind = AArch64::fixup_a64_add_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
+ FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
+ FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
+ FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
+ FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
+ FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
+ FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
+ FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
+ if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
+ Modifier = Expr->getKind();
+
+ unsigned FixupKind = 0;
+ switch(Modifier) {
+ case AArch64MCExpr::VK_AARCH64_None:
+ FixupKind = AArch64::fixup_a64_adr_prel_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_GOT:
+ FixupKind = AArch64::fixup_a64_adr_prel_got_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL:
+ FixupKind = AArch64::fixup_a64_adr_gottprel_page;
+ break;
+ case AArch64MCExpr::VK_AARCH64_TLSDESC:
+ FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
+ break;
+ default:
+ llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Only immediate expected for shift");
+
+ return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
+}
+
+unsigned
+AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Only immediate expected for shift");
+
+ return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+}
+
+
+template<AArch64::Fixups fixupDesired> unsigned
+AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
+ unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ if (MO.isExpr())
+ return getAddressWithFixup(MO, fixupDesired, Fixups);
+
+ assert(MO.isImm());
+ return MO.getImm();
+}
+
+unsigned
+AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
+ unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ if (MO.isImm())
+ return MO.getImm();
+
+ assert(MO.isExpr());
+
+ unsigned FixupKind;
+ if (isa<AArch64MCExpr>(MO.getExpr())) {
+ assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
+ == AArch64MCExpr::VK_AARCH64_GOTTPREL
+ && "Invalid symbol modifier for literal load");
+ FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
+ } else {
+ FixupKind = AArch64::fixup_a64_ld_prel;
+ }
+
+ return getAddressWithFixup(MO, FixupKind, Fixups);
+}
+
+
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+unsigned
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &UImm16MO = MI.getOperand(OpIdx);
+ const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+
+ unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+
+ if (UImm16MO.isImm()) {
+ Result |= UImm16MO.getImm();
+ return Result;
+ }
+
+ const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+ AArch64::Fixups requestedFixup;
+ switch (A64E->getKind()) {
+ default: llvm_unreachable("unexpected expression modifier");
+ case AArch64MCExpr::VK_AARCH64_ABS_G0:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
+ case AArch64MCExpr::VK_AARCH64_ABS_G3:
+ requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
+ requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
+ }
+
+ return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
+}
+
+unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI,
+ unsigned EncodedValue) const {
+ // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation
+ // with 0s, but is architecturally ignored
+ EncodedValue &= ~0x1f0000u;
+
+ return EncodedValue;
+}
+
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (!hasRs) EncodedValue |= 0x001F0000;
+ if (!hasRt2) EncodedValue |= 0x00007C00;
+
+ return EncodedValue;
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue) const {
+ // If one of the signed fixup kinds is applied to a MOVZ instruction, the
+ // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
+ // job to ensure that any bits possibly affected by this are 0. This means we
+ // must zero out bit 30 (essentially emitting a MOVN).
+ MCOperand UImm16MO = MI.getOperand(1);
+
+ // Nothing to do if there's no fixup.
+ if (UImm16MO.isImm())
+ return EncodedValue;
+
+ const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
+ switch (A64E->getKind()) {
+ case AArch64MCExpr::VK_AARCH64_SABS_G0:
+ case AArch64MCExpr::VK_AARCH64_SABS_G1:
+ case AArch64MCExpr::VK_AARCH64_SABS_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
+ case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G2:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G1:
+ case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ return EncodedValue & ~(1u << 30);
+ default:
+ // Nothing to do for an unsigned fixup.
+ return EncodedValue;
+ }
+
+ llvm_unreachable("Should have returned by now");
+}
+
+unsigned
+AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
+ unsigned EncodedValue) const {
+ // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+ // (i.e. all bits 1) but is ignored by the processor.
+ EncodedValue |= 0x1f << 10;
+ return EncodedValue;
+}
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(Ctx);
+}
+
+void AArch64MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+ // following (BLR) instruction. It doesn't emit any code itself so it
+ // doesn't go through the normal TableGenerated channels.
+ MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
+ const MCExpr *Expr;
+ Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
+ Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
+ return;
+ }
+
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+ EmitInstruction(Binary, OS);
+}
+
+
+#include "AArch64GenMCCodeEmitter.inc"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
new file mode 100644
index 0000000..c1abfe7
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -0,0 +1,178 @@
+//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the assembly expression modifiers
+// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "aarch64mcexpr"
+#include "AArch64MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/Object/ELF.h"
+
+using namespace llvm;
+
+const AArch64MCExpr*
+AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) AArch64MCExpr(Kind, Expr);
+}
+
+void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_AARCH64_GOT: OS << ":got:"; break;
+ case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break;
+ case VK_AARCH64_LO12: OS << ":lo12:"; break;
+ case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break;
+ case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break;
+ case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break;
+ case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break;
+ case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break;
+ case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break;
+ case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break;
+ case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break;
+ case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break;
+ case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break;
+ case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break;
+ case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break;
+ case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break;
+ case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break;
+ case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break;
+ case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break;
+ case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break;
+ case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break;
+ case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break;
+ case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break;
+ case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break;
+ case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break;
+ case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break;
+ case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break;
+ case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break;
+ case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break;
+ case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break;
+ case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break;
+ case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break;
+ case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break;
+ case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break;
+ case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break;
+
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return getSubExpr()->EvaluateAsRelocatable(Res, *Layout);
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expression");
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ // We're known to be under a TLS fixup, so any symbol should be
+ // modified. There should be only one.
+ const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+ MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
+ MCELF::SetType(SD, ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ switch (getKind()) {
+ default:
+ return;
+ case VK_AARCH64_DTPREL_G2:
+ case VK_AARCH64_DTPREL_G1:
+ case VK_AARCH64_DTPREL_G1_NC:
+ case VK_AARCH64_DTPREL_G0:
+ case VK_AARCH64_DTPREL_G0_NC:
+ case VK_AARCH64_DTPREL_HI12:
+ case VK_AARCH64_DTPREL_LO12:
+ case VK_AARCH64_DTPREL_LO12_NC:
+ case VK_AARCH64_GOTTPREL_G1:
+ case VK_AARCH64_GOTTPREL_G0_NC:
+ case VK_AARCH64_GOTTPREL:
+ case VK_AARCH64_GOTTPREL_LO12:
+ case VK_AARCH64_TPREL_G2:
+ case VK_AARCH64_TPREL_G1:
+ case VK_AARCH64_TPREL_G1_NC:
+ case VK_AARCH64_TPREL_G0:
+ case VK_AARCH64_TPREL_G0_NC:
+ case VK_AARCH64_TPREL_HI12:
+ case VK_AARCH64_TPREL_LO12:
+ case VK_AARCH64_TPREL_LO12_NC:
+ case VK_AARCH64_TLSDESC:
+ case VK_AARCH64_TLSDESC_LO12:
+ break;
+ }
+
+ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbolsImpl(BE->getLHS(), Asm);
+ AddValueSymbolsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbolsImpl(getSubExpr(), Asm);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
new file mode 100644
index 0000000..c0e3b29
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -0,0 +1,167 @@
+//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes AArch64-specific MCExprs, used for modifiers like
+// ":lo12:" or ":gottprel_g1:".
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCEXPR_H
+#define LLVM_AARCH64MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class AArch64MCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_AARCH64_None,
+ VK_AARCH64_GOT, // :got: modifier in assembly
+ VK_AARCH64_GOT_LO12, // :got_lo12:
+ VK_AARCH64_LO12, // :lo12:
+
+ VK_AARCH64_ABS_G0, // :abs_g0:
+ VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
+ VK_AARCH64_ABS_G1,
+ VK_AARCH64_ABS_G1_NC,
+ VK_AARCH64_ABS_G2,
+ VK_AARCH64_ABS_G2_NC,
+ VK_AARCH64_ABS_G3,
+
+ VK_AARCH64_SABS_G0, // :abs_g0_s:
+ VK_AARCH64_SABS_G1,
+ VK_AARCH64_SABS_G2,
+
+ VK_AARCH64_DTPREL_G2, // :dtprel_g2:
+ VK_AARCH64_DTPREL_G1,
+ VK_AARCH64_DTPREL_G1_NC,
+ VK_AARCH64_DTPREL_G0,
+ VK_AARCH64_DTPREL_G0_NC,
+ VK_AARCH64_DTPREL_HI12,
+ VK_AARCH64_DTPREL_LO12,
+ VK_AARCH64_DTPREL_LO12_NC,
+
+ VK_AARCH64_GOTTPREL_G1, // :gottprel:
+ VK_AARCH64_GOTTPREL_G0_NC,
+ VK_AARCH64_GOTTPREL,
+ VK_AARCH64_GOTTPREL_LO12,
+
+ VK_AARCH64_TPREL_G2, // :tprel:
+ VK_AARCH64_TPREL_G1,
+ VK_AARCH64_TPREL_G1_NC,
+ VK_AARCH64_TPREL_G0,
+ VK_AARCH64_TPREL_G0_NC,
+ VK_AARCH64_TPREL_HI12,
+ VK_AARCH64_TPREL_LO12,
+ VK_AARCH64_TPREL_LO12_NC,
+
+ VK_AARCH64_TLSDESC, // :tlsdesc:
+ VK_AARCH64_TLSDESC_LO12
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_AARCH64_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_AARCH64_GOT, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const AArch64MCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
new file mode 100644
index 0000000..7960db0
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -0,0 +1,194 @@
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCTargetDesc.h"
+#include "AArch64ELFStreamer.h"
+#include "AArch64MCAsmInfo.h"
+#include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitAArch64MCSubtargetInfo(X, TT, CPU, "");
+ return X;
+}
+
+
+static MCInstrInfo *createAArch64MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAArch64MCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitAArch64MCRegisterInfo(X, AArch64::X30);
+ return X;
+}
+
+static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+
+ MCAsmInfo *MAI = new AArch64ELFMCAsmInfo();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(AArch64::XSP, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
+ // On ELF platforms the default static relocation model has a smart enough
+ // linker to cope with referencing external symbols defined in a shared
+ // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+ RM = Reloc::Static;
+ }
+
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+
+ return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+
+static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ if (SyntaxVariant == 0)
+ return new AArch64InstPrinter(MAI, MII, MRI, STI);
+ return 0;
+}
+
+namespace {
+
+class AArch64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+ AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+ virtual bool isUnconditionalBranch(const MCInst &Inst) const {
+ if (Inst.getOpcode() == AArch64::Bcc
+ && Inst.getOperand(0).getImm() == A64CC::AL)
+ return true;
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ virtual bool isConditionalBranch(const MCInst &Inst) const {
+ if (Inst.getOpcode() == AArch64::Bcc
+ && Inst.getOperand(0).getImm() == A64CC::AL)
+ return false;
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size) const {
+ unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
+ // FIXME: We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
+ != MCOI::OPERAND_PCREL)
+ return -1ULL;
+
+ int64_t Imm = Inst.getOperand(LblOperand).getImm();
+
+ return Addr + Imm;
+ }
+};
+
+}
+
+static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
+ return new AArch64MCInstrAnalysis(Info);
+}
+
+
+
+extern "C" void LLVMInitializeAArch64TargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn A(TheAArch64Target, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheAArch64Target,
+ createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheAArch64Target,
+ createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheAArch64Target,
+ createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ using AArch64_MC::createAArch64MCSubtargetInfo;
+ TargetRegistry::RegisterMCSubtargetInfo(TheAArch64Target,
+ createAArch64MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(TheAArch64Target,
+ createAArch64MCInstrAnalysis);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(TheAArch64Target,
+ createAArch64MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheAArch64Target,
+ createAArch64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(TheAArch64Target,
+ createMCStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheAArch64Target,
+ createAArch64MCInstPrinter);
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
new file mode 100644
index 0000000..3849fe3
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -0,0 +1,65 @@
+//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AArch64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64MCTARGETDESC_H
+#define LLVM_AARCH64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheAArch64Target;
+
+namespace AArch64_MC {
+ MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS);
+}
+
+MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
+ uint8_t OSABI);
+
+MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+
+} // End llvm namespace
+
+// Defines symbolic names for AArch64 registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AArch64GenRegisterInfo.inc"
+
+// Defines symbolic names for the AArch64 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AArch64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AArch64GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..44c66a2
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMAArch64Desc
+ AArch64AsmBackend.cpp
+ AArch64ELFObjectWriter.cpp
+ AArch64ELFStreamer.cpp
+ AArch64MCAsmInfo.cpp
+ AArch64MCCodeEmitter.cpp
+ AArch64MCExpr.cpp
+ AArch64MCTargetDesc.cpp
+ )
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..37c8035
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Desc
+parent = AArch64
+required_libraries = AArch64AsmPrinter AArch64Info MC Support
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/MCTargetDesc/Makefile b/lib/Target/AArch64/MCTargetDesc/Makefile
new file mode 100644
index 0000000..5779ac5
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AArch64/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile
new file mode 100644
index 0000000..641bb83
--- /dev/null
+++ b/lib/Target/AArch64/Makefile
@@ -0,0 +1,30 @@
+##===- lib/Target/AArch64/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAArch64CodeGen
+TARGET = AArch64
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AArch64GenAsmMatcher.inc \
+ AArch64GenAsmWriter.inc \
+ AArch64GenCallingConv.inc \
+ AArch64GenDAGISel.inc \
+ AArch64GenDisassemblerTables.inc \
+ AArch64GenInstrInfo.inc \
+ AArch64GenMCCodeEmitter.inc \
+ AArch64GenMCPseudoLowering.inc \
+ AArch64GenRegisterInfo.inc \
+ AArch64GenSubtargetInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
+
+include $(LEVEL)/Makefile.common
+
+
diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt
new file mode 100644
index 0000000..601990f
--- /dev/null
+++ b/lib/Target/AArch64/README.txt
@@ -0,0 +1,2 @@
+This file will contain changes that need to be made before AArch64 can become an
+officially supported target. Currently a placeholder.
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
new file mode 100644
index 0000000..b8099cb
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -0,0 +1,24 @@
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the key registration step for the architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheAArch64Target;
+
+extern "C" void LLVMInitializeAArch64TargetInfo() {
+ RegisterTarget<Triple::aarch64>
+ X(TheAArch64Target, "aarch64", "AArch64");
+}
diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..e236eed
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Info
+ AArch64TargetInfo.cpp
+ )
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..5b003f0
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Info
+parent = AArch64
+required_libraries = MC Support Target
+add_to_library_groups = AArch64
+
diff --git a/lib/Target/AArch64/TargetInfo/Makefile b/lib/Target/AArch64/TargetInfo/Makefile
new file mode 100644
index 0000000..9dc9aa4
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
new file mode 100644
index 0000000..ab9bba1
--- /dev/null
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -0,0 +1,819 @@
+//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides basic encoding and assembly information for AArch64.
+//
+//===----------------------------------------------------------------------===//
+#include "AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Regex.h"
+
+using namespace llvm;
+
+StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+ for (unsigned i = 0; i < NumPairs; ++i) {
+ if (Pairs[i].Value == Value) {
+ Valid = true;
+ return Pairs[i].Name;
+ }
+ }
+
+ Valid = false;
+ return StringRef();
+}
+
+uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+ std::string LowerCaseName = Name.lower();
+ for (unsigned i = 0; i < NumPairs; ++i) {
+ if (Pairs[i].Name == LowerCaseName) {
+ Valid = true;
+ return Pairs[i].Value;
+ }
+ }
+
+ Valid = false;
+ return -1;
+}
+
+bool NamedImmMapper::validImm(uint32_t Value) const {
+ return Value < TooBigImm;
+}
+
+const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+ {"s1e1r", S1E1R},
+ {"s1e2r", S1E2R},
+ {"s1e3r", S1E3R},
+ {"s1e1w", S1E1W},
+ {"s1e2w", S1E2W},
+ {"s1e3w", S1E3W},
+ {"s1e0r", S1E0R},
+ {"s1e0w", S1E0W},
+ {"s12e1r", S12E1R},
+ {"s12e1w", S12E1W},
+ {"s12e0r", S12E0R},
+ {"s12e0w", S12E0W},
+};
+
+A64AT::ATMapper::ATMapper()
+ : NamedImmMapper(ATPairs, 0) {}
+
+const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+ {"oshld", OSHLD},
+ {"oshst", OSHST},
+ {"osh", OSH},
+ {"nshld", NSHLD},
+ {"nshst", NSHST},
+ {"nsh", NSH},
+ {"ishld", ISHLD},
+ {"ishst", ISHST},
+ {"ish", ISH},
+ {"ld", LD},
+ {"st", ST},
+ {"sy", SY}
+};
+
+A64DB::DBarrierMapper::DBarrierMapper()
+ : NamedImmMapper(DBarrierPairs, 16u) {}
+
+const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+ {"zva", ZVA},
+ {"ivac", IVAC},
+ {"isw", ISW},
+ {"cvac", CVAC},
+ {"csw", CSW},
+ {"cvau", CVAU},
+ {"civac", CIVAC},
+ {"cisw", CISW}
+};
+
+A64DC::DCMapper::DCMapper()
+ : NamedImmMapper(DCPairs, 0) {}
+
+const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+ {"ialluis", IALLUIS},
+ {"iallu", IALLU},
+ {"ivau", IVAU}
+};
+
+A64IC::ICMapper::ICMapper()
+ : NamedImmMapper(ICPairs, 0) {}
+
+const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+ {"sy", SY},
+};
+
+A64ISB::ISBMapper::ISBMapper()
+ : NamedImmMapper(ISBPairs, 16) {}
+
+const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+ {"pldl1keep", PLDL1KEEP},
+ {"pldl1strm", PLDL1STRM},
+ {"pldl2keep", PLDL2KEEP},
+ {"pldl2strm", PLDL2STRM},
+ {"pldl3keep", PLDL3KEEP},
+ {"pldl3strm", PLDL3STRM},
+ {"plil1keep", PLIL1KEEP},
+ {"plil1strm", PLIL1STRM},
+ {"plil2keep", PLIL2KEEP},
+ {"plil2strm", PLIL2STRM},
+ {"plil3keep", PLIL3KEEP},
+ {"plil3strm", PLIL3STRM},
+ {"pstl1keep", PSTL1KEEP},
+ {"pstl1strm", PSTL1STRM},
+ {"pstl2keep", PSTL2KEEP},
+ {"pstl2strm", PSTL2STRM},
+ {"pstl3keep", PSTL3KEEP},
+ {"pstl3strm", PSTL3STRM}
+};
+
+A64PRFM::PRFMMapper::PRFMMapper()
+ : NamedImmMapper(PRFMPairs, 32) {}
+
+const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+ {"spsel", SPSel},
+ {"daifset", DAIFSet},
+ {"daifclr", DAIFClr}
+};
+
+A64PState::PStateMapper::PStateMapper()
+ : NamedImmMapper(PStatePairs, 0) {}
+
+const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+ {"mdccsr_el0", MDCCSR_EL0},
+ {"dbgdtrrx_el0", DBGDTRRX_EL0},
+ {"mdrar_el1", MDRAR_EL1},
+ {"oslsr_el1", OSLSR_EL1},
+ {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
+ {"pmceid0_el0", PMCEID0_EL0},
+ {"pmceid1_el0", PMCEID1_EL0},
+ {"midr_el1", MIDR_EL1},
+ {"ccsidr_el1", CCSIDR_EL1},
+ {"clidr_el1", CLIDR_EL1},
+ {"ctr_el0", CTR_EL0},
+ {"mpidr_el1", MPIDR_EL1},
+ {"revidr_el1", REVIDR_EL1},
+ {"aidr_el1", AIDR_EL1},
+ {"dczid_el0", DCZID_EL0},
+ {"id_pfr0_el1", ID_PFR0_EL1},
+ {"id_pfr1_el1", ID_PFR1_EL1},
+ {"id_dfr0_el1", ID_DFR0_EL1},
+ {"id_afr0_el1", ID_AFR0_EL1},
+ {"id_mmfr0_el1", ID_MMFR0_EL1},
+ {"id_mmfr1_el1", ID_MMFR1_EL1},
+ {"id_mmfr2_el1", ID_MMFR2_EL1},
+ {"id_mmfr3_el1", ID_MMFR3_EL1},
+ {"id_isar0_el1", ID_ISAR0_EL1},
+ {"id_isar1_el1", ID_ISAR1_EL1},
+ {"id_isar2_el1", ID_ISAR2_EL1},
+ {"id_isar3_el1", ID_ISAR3_EL1},
+ {"id_isar4_el1", ID_ISAR4_EL1},
+ {"id_isar5_el1", ID_ISAR5_EL1},
+ {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
+ {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
+ {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
+ {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
+ {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
+ {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
+ {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
+ {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
+ {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
+ {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+ {"mvfr0_el1", MVFR0_EL1},
+ {"mvfr1_el1", MVFR1_EL1},
+ {"mvfr2_el1", MVFR2_EL1},
+ {"rvbar_el1", RVBAR_EL1},
+ {"rvbar_el2", RVBAR_EL2},
+ {"rvbar_el3", RVBAR_EL3},
+ {"isr_el1", ISR_EL1},
+ {"cntpct_el0", CNTPCT_EL0},
+ {"cntvct_el0", CNTVCT_EL0}
+};
+
+A64SysReg::MRSMapper::MRSMapper() {
+ InstPairs = &MRSPairs[0];
+ NumInstPairs = llvm::array_lengthof(MRSPairs);
+}
+
+const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+ {"dbgdtrtx_el0", DBGDTRTX_EL0},
+ {"oslar_el1", OSLAR_EL1},
+ {"pmswinc_el0", PMSWINC_EL0}
+};
+
+A64SysReg::MSRMapper::MSRMapper() {
+ InstPairs = &MSRPairs[0];
+ NumInstPairs = llvm::array_lengthof(MSRPairs);
+}
+
+
+const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+ {"osdtrrx_el1", OSDTRRX_EL1},
+ {"osdtrtx_el1", OSDTRTX_EL1},
+ {"teecr32_el1", TEECR32_EL1},
+ {"mdccint_el1", MDCCINT_EL1},
+ {"mdscr_el1", MDSCR_EL1},
+ {"dbgdtr_el0", DBGDTR_EL0},
+ {"oseccr_el1", OSECCR_EL1},
+ {"dbgvcr32_el2", DBGVCR32_EL2},
+ {"dbgbvr0_el1", DBGBVR0_EL1},
+ {"dbgbvr1_el1", DBGBVR1_EL1},
+ {"dbgbvr2_el1", DBGBVR2_EL1},
+ {"dbgbvr3_el1", DBGBVR3_EL1},
+ {"dbgbvr4_el1", DBGBVR4_EL1},
+ {"dbgbvr5_el1", DBGBVR5_EL1},
+ {"dbgbvr6_el1", DBGBVR6_EL1},
+ {"dbgbvr7_el1", DBGBVR7_EL1},
+ {"dbgbvr8_el1", DBGBVR8_EL1},
+ {"dbgbvr9_el1", DBGBVR9_EL1},
+ {"dbgbvr10_el1", DBGBVR10_EL1},
+ {"dbgbvr11_el1", DBGBVR11_EL1},
+ {"dbgbvr12_el1", DBGBVR12_EL1},
+ {"dbgbvr13_el1", DBGBVR13_EL1},
+ {"dbgbvr14_el1", DBGBVR14_EL1},
+ {"dbgbvr15_el1", DBGBVR15_EL1},
+ {"dbgbcr0_el1", DBGBCR0_EL1},
+ {"dbgbcr1_el1", DBGBCR1_EL1},
+ {"dbgbcr2_el1", DBGBCR2_EL1},
+ {"dbgbcr3_el1", DBGBCR3_EL1},
+ {"dbgbcr4_el1", DBGBCR4_EL1},
+ {"dbgbcr5_el1", DBGBCR5_EL1},
+ {"dbgbcr6_el1", DBGBCR6_EL1},
+ {"dbgbcr7_el1", DBGBCR7_EL1},
+ {"dbgbcr8_el1", DBGBCR8_EL1},
+ {"dbgbcr9_el1", DBGBCR9_EL1},
+ {"dbgbcr10_el1", DBGBCR10_EL1},
+ {"dbgbcr11_el1", DBGBCR11_EL1},
+ {"dbgbcr12_el1", DBGBCR12_EL1},
+ {"dbgbcr13_el1", DBGBCR13_EL1},
+ {"dbgbcr14_el1", DBGBCR14_EL1},
+ {"dbgbcr15_el1", DBGBCR15_EL1},
+ {"dbgwvr0_el1", DBGWVR0_EL1},
+ {"dbgwvr1_el1", DBGWVR1_EL1},
+ {"dbgwvr2_el1", DBGWVR2_EL1},
+ {"dbgwvr3_el1", DBGWVR3_EL1},
+ {"dbgwvr4_el1", DBGWVR4_EL1},
+ {"dbgwvr5_el1", DBGWVR5_EL1},
+ {"dbgwvr6_el1", DBGWVR6_EL1},
+ {"dbgwvr7_el1", DBGWVR7_EL1},
+ {"dbgwvr8_el1", DBGWVR8_EL1},
+ {"dbgwvr9_el1", DBGWVR9_EL1},
+ {"dbgwvr10_el1", DBGWVR10_EL1},
+ {"dbgwvr11_el1", DBGWVR11_EL1},
+ {"dbgwvr12_el1", DBGWVR12_EL1},
+ {"dbgwvr13_el1", DBGWVR13_EL1},
+ {"dbgwvr14_el1", DBGWVR14_EL1},
+ {"dbgwvr15_el1", DBGWVR15_EL1},
+ {"dbgwcr0_el1", DBGWCR0_EL1},
+ {"dbgwcr1_el1", DBGWCR1_EL1},
+ {"dbgwcr2_el1", DBGWCR2_EL1},
+ {"dbgwcr3_el1", DBGWCR3_EL1},
+ {"dbgwcr4_el1", DBGWCR4_EL1},
+ {"dbgwcr5_el1", DBGWCR5_EL1},
+ {"dbgwcr6_el1", DBGWCR6_EL1},
+ {"dbgwcr7_el1", DBGWCR7_EL1},
+ {"dbgwcr8_el1", DBGWCR8_EL1},
+ {"dbgwcr9_el1", DBGWCR9_EL1},
+ {"dbgwcr10_el1", DBGWCR10_EL1},
+ {"dbgwcr11_el1", DBGWCR11_EL1},
+ {"dbgwcr12_el1", DBGWCR12_EL1},
+ {"dbgwcr13_el1", DBGWCR13_EL1},
+ {"dbgwcr14_el1", DBGWCR14_EL1},
+ {"dbgwcr15_el1", DBGWCR15_EL1},
+ {"teehbr32_el1", TEEHBR32_EL1},
+ {"osdlr_el1", OSDLR_EL1},
+ {"dbgprcr_el1", DBGPRCR_EL1},
+ {"dbgclaimset_el1", DBGCLAIMSET_EL1},
+ {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
+ {"csselr_el1", CSSELR_EL1},
+ {"vpidr_el2", VPIDR_EL2},
+ {"vmpidr_el2", VMPIDR_EL2},
+ {"sctlr_el1", SCTLR_EL1},
+ {"sctlr_el2", SCTLR_EL2},
+ {"sctlr_el3", SCTLR_EL3},
+ {"actlr_el1", ACTLR_EL1},
+ {"actlr_el2", ACTLR_EL2},
+ {"actlr_el3", ACTLR_EL3},
+ {"cpacr_el1", CPACR_EL1},
+ {"hcr_el2", HCR_EL2},
+ {"scr_el3", SCR_EL3},
+ {"mdcr_el2", MDCR_EL2},
+ {"sder32_el3", SDER32_EL3},
+ {"cptr_el2", CPTR_EL2},
+ {"cptr_el3", CPTR_EL3},
+ {"hstr_el2", HSTR_EL2},
+ {"hacr_el2", HACR_EL2},
+ {"mdcr_el3", MDCR_EL3},
+ {"ttbr0_el1", TTBR0_EL1},
+ {"ttbr0_el2", TTBR0_EL2},
+ {"ttbr0_el3", TTBR0_EL3},
+ {"ttbr1_el1", TTBR1_EL1},
+ {"tcr_el1", TCR_EL1},
+ {"tcr_el2", TCR_EL2},
+ {"tcr_el3", TCR_EL3},
+ {"vttbr_el2", VTTBR_EL2},
+ {"vtcr_el2", VTCR_EL2},
+ {"dacr32_el2", DACR32_EL2},
+ {"spsr_el1", SPSR_EL1},
+ {"spsr_el2", SPSR_EL2},
+ {"spsr_el3", SPSR_EL3},
+ {"elr_el1", ELR_EL1},
+ {"elr_el2", ELR_EL2},
+ {"elr_el3", ELR_EL3},
+ {"sp_el0", SP_EL0},
+ {"sp_el1", SP_EL1},
+ {"sp_el2", SP_EL2},
+ {"spsel", SPSel},
+ {"nzcv", NZCV},
+ {"daif", DAIF},
+ {"currentel", CurrentEL},
+ {"spsr_irq", SPSR_irq},
+ {"spsr_abt", SPSR_abt},
+ {"spsr_und", SPSR_und},
+ {"spsr_fiq", SPSR_fiq},
+ {"fpcr", FPCR},
+ {"fpsr", FPSR},
+ {"dspsr_el0", DSPSR_EL0},
+ {"dlr_el0", DLR_EL0},
+ {"ifsr32_el2", IFSR32_EL2},
+ {"afsr0_el1", AFSR0_EL1},
+ {"afsr0_el2", AFSR0_EL2},
+ {"afsr0_el3", AFSR0_EL3},
+ {"afsr1_el1", AFSR1_EL1},
+ {"afsr1_el2", AFSR1_EL2},
+ {"afsr1_el3", AFSR1_EL3},
+ {"esr_el1", ESR_EL1},
+ {"esr_el2", ESR_EL2},
+ {"esr_el3", ESR_EL3},
+ {"fpexc32_el2", FPEXC32_EL2},
+ {"far_el1", FAR_EL1},
+ {"far_el2", FAR_EL2},
+ {"far_el3", FAR_EL3},
+ {"hpfar_el2", HPFAR_EL2},
+ {"par_el1", PAR_EL1},
+ {"pmcr_el0", PMCR_EL0},
+ {"pmcntenset_el0", PMCNTENSET_EL0},
+ {"pmcntenclr_el0", PMCNTENCLR_EL0},
+ {"pmovsclr_el0", PMOVSCLR_EL0},
+ {"pmselr_el0", PMSELR_EL0},
+ {"pmccntr_el0", PMCCNTR_EL0},
+ {"pmxevtyper_el0", PMXEVTYPER_EL0},
+ {"pmxevcntr_el0", PMXEVCNTR_EL0},
+ {"pmuserenr_el0", PMUSERENR_EL0},
+ {"pmintenset_el1", PMINTENSET_EL1},
+ {"pmintenclr_el1", PMINTENCLR_EL1},
+ {"pmovsset_el0", PMOVSSET_EL0},
+ {"mair_el1", MAIR_EL1},
+ {"mair_el2", MAIR_EL2},
+ {"mair_el3", MAIR_EL3},
+ {"amair_el1", AMAIR_EL1},
+ {"amair_el2", AMAIR_EL2},
+ {"amair_el3", AMAIR_EL3},
+ {"vbar_el1", VBAR_EL1},
+ {"vbar_el2", VBAR_EL2},
+ {"vbar_el3", VBAR_EL3},
+ {"rmr_el1", RMR_EL1},
+ {"rmr_el2", RMR_EL2},
+ {"rmr_el3", RMR_EL3},
+ {"contextidr_el1", CONTEXTIDR_EL1},
+ {"tpidr_el0", TPIDR_EL0},
+ {"tpidr_el2", TPIDR_EL2},
+ {"tpidr_el3", TPIDR_EL3},
+ {"tpidrro_el0", TPIDRRO_EL0},
+ {"tpidr_el1", TPIDR_EL1},
+ {"cntfrq_el0", CNTFRQ_EL0},
+ {"cntvoff_el2", CNTVOFF_EL2},
+ {"cntkctl_el1", CNTKCTL_EL1},
+ {"cnthctl_el2", CNTHCTL_EL2},
+ {"cntp_tval_el0", CNTP_TVAL_EL0},
+ {"cnthp_tval_el2", CNTHP_TVAL_EL2},
+ {"cntps_tval_el1", CNTPS_TVAL_EL1},
+ {"cntp_ctl_el0", CNTP_CTL_EL0},
+ {"cnthp_ctl_el2", CNTHP_CTL_EL2},
+ {"cntps_ctl_el1", CNTPS_CTL_EL1},
+ {"cntp_cval_el0", CNTP_CVAL_EL0},
+ {"cnthp_cval_el2", CNTHP_CVAL_EL2},
+ {"cntps_cval_el1", CNTPS_CVAL_EL1},
+ {"cntv_tval_el0", CNTV_TVAL_EL0},
+ {"cntv_ctl_el0", CNTV_CTL_EL0},
+ {"cntv_cval_el0", CNTV_CVAL_EL0},
+ {"pmevcntr0_el0", PMEVCNTR0_EL0},
+ {"pmevcntr1_el0", PMEVCNTR1_EL0},
+ {"pmevcntr2_el0", PMEVCNTR2_EL0},
+ {"pmevcntr3_el0", PMEVCNTR3_EL0},
+ {"pmevcntr4_el0", PMEVCNTR4_EL0},
+ {"pmevcntr5_el0", PMEVCNTR5_EL0},
+ {"pmevcntr6_el0", PMEVCNTR6_EL0},
+ {"pmevcntr7_el0", PMEVCNTR7_EL0},
+ {"pmevcntr8_el0", PMEVCNTR8_EL0},
+ {"pmevcntr9_el0", PMEVCNTR9_EL0},
+ {"pmevcntr10_el0", PMEVCNTR10_EL0},
+ {"pmevcntr11_el0", PMEVCNTR11_EL0},
+ {"pmevcntr12_el0", PMEVCNTR12_EL0},
+ {"pmevcntr13_el0", PMEVCNTR13_EL0},
+ {"pmevcntr14_el0", PMEVCNTR14_EL0},
+ {"pmevcntr15_el0", PMEVCNTR15_EL0},
+ {"pmevcntr16_el0", PMEVCNTR16_EL0},
+ {"pmevcntr17_el0", PMEVCNTR17_EL0},
+ {"pmevcntr18_el0", PMEVCNTR18_EL0},
+ {"pmevcntr19_el0", PMEVCNTR19_EL0},
+ {"pmevcntr20_el0", PMEVCNTR20_EL0},
+ {"pmevcntr21_el0", PMEVCNTR21_EL0},
+ {"pmevcntr22_el0", PMEVCNTR22_EL0},
+ {"pmevcntr23_el0", PMEVCNTR23_EL0},
+ {"pmevcntr24_el0", PMEVCNTR24_EL0},
+ {"pmevcntr25_el0", PMEVCNTR25_EL0},
+ {"pmevcntr26_el0", PMEVCNTR26_EL0},
+ {"pmevcntr27_el0", PMEVCNTR27_EL0},
+ {"pmevcntr28_el0", PMEVCNTR28_EL0},
+ {"pmevcntr29_el0", PMEVCNTR29_EL0},
+ {"pmevcntr30_el0", PMEVCNTR30_EL0},
+ {"pmccfiltr_el0", PMCCFILTR_EL0},
+ {"pmevtyper0_el0", PMEVTYPER0_EL0},
+ {"pmevtyper1_el0", PMEVTYPER1_EL0},
+ {"pmevtyper2_el0", PMEVTYPER2_EL0},
+ {"pmevtyper3_el0", PMEVTYPER3_EL0},
+ {"pmevtyper4_el0", PMEVTYPER4_EL0},
+ {"pmevtyper5_el0", PMEVTYPER5_EL0},
+ {"pmevtyper6_el0", PMEVTYPER6_EL0},
+ {"pmevtyper7_el0", PMEVTYPER7_EL0},
+ {"pmevtyper8_el0", PMEVTYPER8_EL0},
+ {"pmevtyper9_el0", PMEVTYPER9_EL0},
+ {"pmevtyper10_el0", PMEVTYPER10_EL0},
+ {"pmevtyper11_el0", PMEVTYPER11_EL0},
+ {"pmevtyper12_el0", PMEVTYPER12_EL0},
+ {"pmevtyper13_el0", PMEVTYPER13_EL0},
+ {"pmevtyper14_el0", PMEVTYPER14_EL0},
+ {"pmevtyper15_el0", PMEVTYPER15_EL0},
+ {"pmevtyper16_el0", PMEVTYPER16_EL0},
+ {"pmevtyper17_el0", PMEVTYPER17_EL0},
+ {"pmevtyper18_el0", PMEVTYPER18_EL0},
+ {"pmevtyper19_el0", PMEVTYPER19_EL0},
+ {"pmevtyper20_el0", PMEVTYPER20_EL0},
+ {"pmevtyper21_el0", PMEVTYPER21_EL0},
+ {"pmevtyper22_el0", PMEVTYPER22_EL0},
+ {"pmevtyper23_el0", PMEVTYPER23_EL0},
+ {"pmevtyper24_el0", PMEVTYPER24_EL0},
+ {"pmevtyper25_el0", PMEVTYPER25_EL0},
+ {"pmevtyper26_el0", PMEVTYPER26_EL0},
+ {"pmevtyper27_el0", PMEVTYPER27_EL0},
+ {"pmevtyper28_el0", PMEVTYPER28_EL0},
+ {"pmevtyper29_el0", PMEVTYPER29_EL0},
+ {"pmevtyper30_el0", PMEVTYPER30_EL0},
+};
+
+uint32_t
+A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+ // First search the registers shared by all
+ std::string NameLower = Name.lower();
+ for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+ if (SysRegPairs[i].Name == NameLower) {
+ Valid = true;
+ return SysRegPairs[i].Value;
+ }
+ }
+
+ // Now try the instruction-specific registers (either read-only or
+ // write-only).
+ for (unsigned i = 0; i < NumInstPairs; ++i) {
+ if (InstPairs[i].Name == NameLower) {
+ Valid = true;
+ return InstPairs[i].Value;
+ }
+ }
+
+ // Try to parse an S<op0>_<op1>_<Cn>_<Cm>_<op2> register name, where the bits
+ // are: 11 xxx 1x11 xxxx xxx
+ Regex GenericRegPattern("^s3_([0-7])_c(1[15])_c([0-9]|1[0-5])_([0-7])$");
+
+ SmallVector<StringRef, 4> Ops;
+ if (!GenericRegPattern.match(NameLower, &Ops)) {
+ Valid = false;
+ return -1;
+ }
+
+ uint32_t Op0 = 3, Op1 = 0, CRn = 0, CRm = 0, Op2 = 0;
+ uint32_t Bits;
+ Ops[1].getAsInteger(10, Op1);
+ Ops[2].getAsInteger(10, CRn);
+ Ops[3].getAsInteger(10, CRm);
+ Ops[4].getAsInteger(10, Op2);
+ Bits = (Op0 << 14) | (Op1 << 11) | (CRn << 7) | (CRm << 3) | Op2;
+
+ Valid = true;
+ return Bits;
+}
+
+std::string
+A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+ for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
+ if (SysRegPairs[i].Value == Bits) {
+ Valid = true;
+ return SysRegPairs[i].Name;
+ }
+ }
+
+ for (unsigned i = 0; i < NumInstPairs; ++i) {
+ if (InstPairs[i].Value == Bits) {
+ Valid = true;
+ return InstPairs[i].Name;
+ }
+ }
+
+ uint32_t Op0 = (Bits >> 14) & 0x3;
+ uint32_t Op1 = (Bits >> 11) & 0x7;
+ uint32_t CRn = (Bits >> 7) & 0xf;
+ uint32_t CRm = (Bits >> 3) & 0xf;
+ uint32_t Op2 = Bits & 0x7;
+
+ // Only combinations matching: 11 xxx 1x11 xxxx xxx are valid for a generic
+ // name.
+ if (Op0 != 3 || (CRn != 11 && CRn != 15)) {
+ Valid = false;
+ return "";
+ }
+
+ assert(Op0 == 3 && (CRn == 11 || CRn == 15) && "Invalid generic sysreg");
+
+ Valid = true;
+ return "s3_" + utostr(Op1) + "_c" + utostr(CRn)
+ + "_c" + utostr(CRm) + "_" + utostr(Op2);
+}
+
+const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+ {"ipas2e1is", IPAS2E1IS},
+ {"ipas2le1is", IPAS2LE1IS},
+ {"vmalle1is", VMALLE1IS},
+ {"alle2is", ALLE2IS},
+ {"alle3is", ALLE3IS},
+ {"vae1is", VAE1IS},
+ {"vae2is", VAE2IS},
+ {"vae3is", VAE3IS},
+ {"aside1is", ASIDE1IS},
+ {"vaae1is", VAAE1IS},
+ {"alle1is", ALLE1IS},
+ {"vale1is", VALE1IS},
+ {"vale2is", VALE2IS},
+ {"vale3is", VALE3IS},
+ {"vmalls12e1is", VMALLS12E1IS},
+ {"vaale1is", VAALE1IS},
+ {"ipas2e1", IPAS2E1},
+ {"ipas2le1", IPAS2LE1},
+ {"vmalle1", VMALLE1},
+ {"alle2", ALLE2},
+ {"alle3", ALLE3},
+ {"vae1", VAE1},
+ {"vae2", VAE2},
+ {"vae3", VAE3},
+ {"aside1", ASIDE1},
+ {"vaae1", VAAE1},
+ {"alle1", ALLE1},
+ {"vale1", VALE1},
+ {"vale2", VALE2},
+ {"vale3", VALE3},
+ {"vmalls12e1", VMALLS12E1},
+ {"vaale1", VAALE1}
+};
+
+A64TLBI::TLBIMapper::TLBIMapper()
+ : NamedImmMapper(TLBIPairs, 0) {}
+
+bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
+ const fltSemantics &Sem = Val.getSemantics();
+ unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
+
+ uint32_t ExpMask;
+ switch (FracBits) {
+ case 10: // IEEE half-precision
+ ExpMask = 0x1f;
+ break;
+ case 23: // IEEE single-precision
+ ExpMask = 0xff;
+ break;
+ case 52: // IEEE double-precision
+ ExpMask = 0x7ff;
+ break;
+ case 112: // IEEE quad-precision
+ // No immediates are valid for double precision.
+ return false;
+ default:
+ llvm_unreachable("Only half, single and double precision supported");
+ }
+
+ uint32_t ExpStart = FracBits;
+ uint64_t FracMask = (1ULL << FracBits) - 1;
+
+ uint32_t Sign = Val.isNegative();
+
+ uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
+ uint64_t Fraction = Bits & FracMask;
+ int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
+ Exponent -= ExpMask >> 1;
+
+ // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
+ // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
+ // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
+ uint64_t A64FracStart = FracBits - 4;
+ uint64_t A64FracMask = 0xf;
+
+ // Are there too many fraction bits?
+ if (Fraction & ~(A64FracMask << A64FracStart))
+ return false;
+
+ if (Exponent < -3 || Exponent > 4)
+ return false;
+
+ uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
+ uint32_t PackedExp = (Exponent + 7) & 0x7;
+
+ Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
+ return true;
+}
+
+// Encoding of the immediate for logical (immediate) instructions:
+//
+// | N | imms | immr | size | R | S |
+// |---+--------+--------+------+--------------+--------------|
+// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) |
+// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) |
+// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) |
+// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) |
+// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) |
+// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) |
+// | 0 | 11111x | - | | UNALLOCATED | |
+//
+// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
+// which the lower S+1 bits are ones and the remaining bits are zero, then
+// rotated right by R bits, which is then replicated across the datapath.
+//
+// + Values of 'N', 'imms' and 'immr' which do not match the above table are
+// RESERVED.
+// + If all 's' bits in the imms field are set then the instruction is
+// RESERVED.
+// + The 'x' bits in the 'immr' field are IGNORED.
+
+bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
+ int RepeatWidth;
+ int Rotation = 0;
+ int Num1s = 0;
+
+ // Because there are S+1 ones in the replicated mask, an immediate of all
+ // zeros is not allowed. Filtering it here is probably more efficient.
+ if (Imm == 0) return false;
+
+ for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
+ uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
+ uint64_t ReplicatedMask = Imm & RepeatMask;
+
+ if (ReplicatedMask == 0) continue;
+
+ // First we have to make sure the mask is actually repeated in each slot for
+ // this width-specifier.
+ bool IsReplicatedMask = true;
+ for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
+ if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
+ IsReplicatedMask = false;
+ break;
+ }
+ }
+ if (!IsReplicatedMask) continue;
+
+ // Now we have to work out the amount of rotation needed. The first part of
+ // this calculation is actually independent of RepeatWidth, but the complex
+ // case will depend on it.
+ Rotation = CountTrailingZeros_64(Imm);
+ if (Rotation == 0) {
+ // There were no leading zeros, which means it's either in place or there
+ // are 1s at each end (e.g. 0x8003 needs rotating).
+ Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
+ : CountLeadingOnes_32(Imm);
+ Rotation = RepeatWidth - Rotation;
+ }
+
+ uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
+ | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+ // Of course, they may not actually be ones, so we have to check that:
+ if (!isMask_64(ReplicatedOnes))
+ continue;
+
+ Num1s = CountTrailingOnes_64(ReplicatedOnes);
+
+ // We know we've got an almost valid encoding (certainly, if this is invalid
+ // no other parameters would work).
+ break;
+ }
+
+ // The encodings which would produce all 1s are RESERVED.
+ if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
+
+ uint32_t N = RepeatWidth == 64;
+ uint32_t ImmR = RepeatWidth - Rotation;
+ uint32_t ImmS = Num1s - 1;
+
+ switch (RepeatWidth) {
+ default: break; // No action required for other valid rotations.
+ case 16: ImmS |= 0x20; break; // 10ssss
+ case 8: ImmS |= 0x30; break; // 110sss
+ case 4: ImmS |= 0x38; break; // 1110ss
+ case 2: ImmS |= 0x3c; break; // 11110s
+ }
+
+ Bits = ImmS | (ImmR << 6) | (N << 12);
+
+ return true;
+}
+
+
+bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
+ uint64_t &Imm) {
+ uint32_t N = Bits >> 12;
+ uint32_t ImmR = (Bits >> 6) & 0x3f;
+ uint32_t ImmS = Bits & 0x3f;
+
+ // N=1 encodes a 64-bit replication and is invalid for the 32-bit
+ // instructions.
+ if (RegWidth == 32 && N != 0) return false;
+
+ int Width = 0;
+ if (N == 1)
+ Width = 64;
+ else if ((ImmS & 0x20) == 0)
+ Width = 32;
+ else if ((ImmS & 0x10) == 0)
+ Width = 16;
+ else if ((ImmS & 0x08) == 0)
+ Width = 8;
+ else if ((ImmS & 0x04) == 0)
+ Width = 4;
+ else if ((ImmS & 0x02) == 0)
+ Width = 2;
+ else {
+ // ImmS is 0b11111x: UNALLOCATED
+ return false;
+ }
+
+ int Num1s = (ImmS & (Width - 1)) + 1;
+
+ // All encodings which would map to -1 (signed) are RESERVED.
+ if (Num1s == Width) return false;
+
+ int Rotation = (ImmR & (Width - 1));
+ uint64_t Mask = (1ULL << Num1s) - 1;
+ uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
+ Mask = (Mask >> Rotation)
+ | ((Mask << (Width - Rotation)) & WidthMask);
+
+ Imm = 0;
+ for (unsigned i = 0; i < RegWidth / Width; ++i) {
+ Imm |= Mask;
+ Mask <<= Width;
+ }
+
+ return true;
+}
+
+bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+ // If high bits are set then a 32-bit MOVZ can't possibly work.
+ if (RegWidth == 32 && (Value & ~0xffffffffULL))
+ return false;
+
+ for (int i = 0; i < RegWidth; i += 16) {
+ // If the value is 0 when we mask out all the bits that could be set with
+ // the current LSL value then it's representable.
+ if ((Value & ~(0xffffULL << i)) == 0) {
+ Shift = i / 16;
+ UImm16 = (Value >> i) & 0xffff;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
+ // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
+
+ // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
+ // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
+ // a valid input for isMOVZImm.
+ if (RegWidth == 32 && (Value & ~0xffffffffULL))
+ return false;
+
+ uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
+
+ return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
+}
+
+bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
+ int &UImm16, int &Shift) {
+ if (isMOVZImm(RegWidth, Value, UImm16, Shift))
+ return false;
+
+ return isMOVNImm(RegWidth, Value, UImm16, Shift);
+}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
new file mode 100644
index 0000000..5eebf44
--- /dev/null
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -0,0 +1,784 @@
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the AArch64 target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AARCH64_BASEINFO_H
+#define LLVM_AARCH64_BASEINFO_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// // Enums corresponding to AArch64 condition codes
+namespace A64CC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ = 0, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Unsigned higher or same >, ==, or unordered
+ LO, // Unsigned lower or same Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Ordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Signed greater than Greater than
+ LE, // Signed less than or equal <, ==, or unordered
+ AL, // Always (unconditional) Always (unconditional)
+ NV, // Always (unconditional) Always (unconditional)
+ // Note the NV exists purely to disassemble 0b1111. Execution
+ // is "always".
+ Invalid
+ };
+
+} // namespace A64CC
+
+inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case A64CC::EQ: return "eq";
+ case A64CC::NE: return "ne";
+ case A64CC::HS: return "hs";
+ case A64CC::LO: return "lo";
+ case A64CC::MI: return "mi";
+ case A64CC::PL: return "pl";
+ case A64CC::VS: return "vs";
+ case A64CC::VC: return "vc";
+ case A64CC::HI: return "hi";
+ case A64CC::LS: return "ls";
+ case A64CC::GE: return "ge";
+ case A64CC::LT: return "lt";
+ case A64CC::GT: return "gt";
+ case A64CC::LE: return "le";
+ case A64CC::AL: return "al";
+ case A64CC::NV: return "nv";
+ }
+}
+
+inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
+ return StringSwitch<A64CC::CondCodes>(CondStr.lower())
+ .Case("eq", A64CC::EQ)
+ .Case("ne", A64CC::NE)
+ .Case("ne", A64CC::NE)
+ .Case("hs", A64CC::HS)
+ .Case("cs", A64CC::HS)
+ .Case("lo", A64CC::LO)
+ .Case("cc", A64CC::LO)
+ .Case("mi", A64CC::MI)
+ .Case("pl", A64CC::PL)
+ .Case("vs", A64CC::VS)
+ .Case("vc", A64CC::VC)
+ .Case("hi", A64CC::HI)
+ .Case("ls", A64CC::LS)
+ .Case("ge", A64CC::GE)
+ .Case("lt", A64CC::LT)
+ .Case("gt", A64CC::GT)
+ .Case("le", A64CC::LE)
+ .Case("al", A64CC::AL)
+ .Case("nv", A64CC::NV)
+ .Default(A64CC::Invalid);
+}
+
+inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
+ // It turns out that the condition codes have been designed so that in order
+ // to reverse the intent of the condition you only have to invert the low bit:
+
+ return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+}
+
+/// Instances of this class can perform bidirectional mapping from random
+/// identifier strings to operand encodings. For example "MSR" takes a named
+/// system-register which must be encoded somehow and decoded for printing. This
+/// central location means that the information for those transformations is not
+/// duplicated and remains in sync.
+///
+/// FIXME: currently the algorithm is a completely unoptimised linear
+/// search. Obviously this could be improved, but we would probably want to work
+/// out just how often these instructions are emitted before working on it. It
+/// might even be optimal to just reorder the tables for the common instructions
+/// rather than changing the algorithm.
+struct NamedImmMapper {
+ struct Mapping {
+ const char *Name;
+ uint32_t Value;
+ };
+
+ template<int N>
+ NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+ : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+
+ StringRef toString(uint32_t Value, bool &Valid) const;
+ uint32_t fromString(StringRef Name, bool &Valid) const;
+
+ /// Many of the instructions allow an alternative assembly form consisting of
+ /// a simple immediate. Currently the only valid forms are ranges [0, N) where
+ /// N being 0 indicates no immediate syntax-form is allowed.
+ bool validImm(uint32_t Value) const;
+protected:
+ const Mapping *Pairs;
+ size_t NumPairs;
+ uint32_t TooBigImm;
+};
+
+namespace A64AT {
+ enum ATValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ S1E1R = 0x43c0, // 01 000 0111 1000 000
+ S1E2R = 0x63c0, // 01 100 0111 1000 000
+ S1E3R = 0x73c0, // 01 110 0111 1000 000
+ S1E1W = 0x43c1, // 01 000 0111 1000 001
+ S1E2W = 0x63c1, // 01 100 0111 1000 001
+ S1E3W = 0x73c1, // 01 110 0111 1000 001
+ S1E0R = 0x43c2, // 01 000 0111 1000 010
+ S1E0W = 0x43c3, // 01 000 0111 1000 011
+ S12E1R = 0x63c4, // 01 100 0111 1000 100
+ S12E1W = 0x63c5, // 01 100 0111 1000 101
+ S12E0R = 0x63c6, // 01 100 0111 1000 110
+ S12E0W = 0x63c7 // 01 100 0111 1000 111
+ };
+
+ struct ATMapper : NamedImmMapper {
+ const static Mapping ATPairs[];
+
+ ATMapper();
+ };
+
+}
+namespace A64DB {
+ enum DBValues {
+ Invalid = -1,
+ OSHLD = 0x1,
+ OSHST = 0x2,
+ OSH = 0x3,
+ NSHLD = 0x5,
+ NSHST = 0x6,
+ NSH = 0x7,
+ ISHLD = 0x9,
+ ISHST = 0xa,
+ ISH = 0xb,
+ LD = 0xd,
+ ST = 0xe,
+ SY = 0xf
+ };
+
+ struct DBarrierMapper : NamedImmMapper {
+ const static Mapping DBarrierPairs[];
+
+ DBarrierMapper();
+ };
+}
+
+namespace A64DC {
+ enum DCValues {
+ Invalid = -1, // Op1 CRn CRm Op2
+ ZVA = 0x5ba1, // 01 011 0111 0100 001
+ IVAC = 0x43b1, // 01 000 0111 0110 001
+ ISW = 0x43b2, // 01 000 0111 0110 010
+ CVAC = 0x5bd1, // 01 011 0111 1010 001
+ CSW = 0x43d2, // 01 000 0111 1010 010
+ CVAU = 0x5bd9, // 01 011 0111 1011 001
+ CIVAC = 0x5bf1, // 01 011 0111 1110 001
+ CISW = 0x43f2 // 01 000 0111 1110 010
+ };
+
+ struct DCMapper : NamedImmMapper {
+ const static Mapping DCPairs[];
+
+ DCMapper();
+ };
+
+}
+
+namespace A64IC {
+ enum ICValues {
+ Invalid = -1, // Op1 CRn CRm Op2
+ IALLUIS = 0x0388, // 000 0111 0001 000
+ IALLU = 0x03a8, // 000 0111 0101 000
+ IVAU = 0x1ba9 // 011 0111 0101 001
+ };
+
+
+ struct ICMapper : NamedImmMapper {
+ const static Mapping ICPairs[];
+
+ ICMapper();
+ };
+
+ static inline bool NeedsRegister(ICValues Val) {
+ return Val == IVAU;
+ }
+}
+
+namespace A64ISB {
+ enum ISBValues {
+ Invalid = -1,
+ SY = 0xf
+ };
+ struct ISBMapper : NamedImmMapper {
+ const static Mapping ISBPairs[];
+
+ ISBMapper();
+ };
+}
+
+namespace A64PRFM {
+ enum PRFMValues {
+ Invalid = -1,
+ PLDL1KEEP = 0x00,
+ PLDL1STRM = 0x01,
+ PLDL2KEEP = 0x02,
+ PLDL2STRM = 0x03,
+ PLDL3KEEP = 0x04,
+ PLDL3STRM = 0x05,
+ PLIL1KEEP = 0x08,
+ PLIL1STRM = 0x09,
+ PLIL2KEEP = 0x0a,
+ PLIL2STRM = 0x0b,
+ PLIL3KEEP = 0x0c,
+ PLIL3STRM = 0x0d,
+ PSTL1KEEP = 0x10,
+ PSTL1STRM = 0x11,
+ PSTL2KEEP = 0x12,
+ PSTL2STRM = 0x13,
+ PSTL3KEEP = 0x14,
+ PSTL3STRM = 0x15
+ };
+
+ struct PRFMMapper : NamedImmMapper {
+ const static Mapping PRFMPairs[];
+
+ PRFMMapper();
+ };
+}
+
+namespace A64PState {
+ enum PStateValues {
+ Invalid = -1,
+ SPSel = 0x05,
+ DAIFSet = 0x1e,
+ DAIFClr = 0x1f
+ };
+
+ struct PStateMapper : NamedImmMapper {
+ const static Mapping PStatePairs[];
+
+ PStateMapper();
+ };
+
+}
+
+namespace A64SE {
+ enum ShiftExtSpecifiers {
+ Invalid = -1,
+ LSL,
+ LSR,
+ ASR,
+ ROR,
+
+ UXTB,
+ UXTH,
+ UXTW,
+ UXTX,
+
+ SXTB,
+ SXTH,
+ SXTW,
+ SXTX
+ };
+}
+
+namespace A64SysReg {
+ enum SysRegROValues {
+ MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000
+ DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000
+ MDRAR_EL1 = 0x8080, // 10 000 0001 0000 000
+ OSLSR_EL1 = 0x808c, // 10 000 0001 0001 100
+ DBGAUTHSTATUS_EL1 = 0x83f6, // 10 000 0111 1110 110
+ PMCEID0_EL0 = 0xdce6, // 11 011 1001 1100 110
+ PMCEID1_EL0 = 0xdce7, // 11 011 1001 1100 111
+ MIDR_EL1 = 0xc000, // 11 000 0000 0000 000
+ CCSIDR_EL1 = 0xc800, // 11 001 0000 0000 000
+ CLIDR_EL1 = 0xc801, // 11 001 0000 0000 001
+ CTR_EL0 = 0xd801, // 11 011 0000 0000 001
+ MPIDR_EL1 = 0xc005, // 11 000 0000 0000 101
+ REVIDR_EL1 = 0xc006, // 11 000 0000 0000 110
+ AIDR_EL1 = 0xc807, // 11 001 0000 0000 111
+ DCZID_EL0 = 0xd807, // 11 011 0000 0000 111
+ ID_PFR0_EL1 = 0xc008, // 11 000 0000 0001 000
+ ID_PFR1_EL1 = 0xc009, // 11 000 0000 0001 001
+ ID_DFR0_EL1 = 0xc00a, // 11 000 0000 0001 010
+ ID_AFR0_EL1 = 0xc00b, // 11 000 0000 0001 011
+ ID_MMFR0_EL1 = 0xc00c, // 11 000 0000 0001 100
+ ID_MMFR1_EL1 = 0xc00d, // 11 000 0000 0001 101
+ ID_MMFR2_EL1 = 0xc00e, // 11 000 0000 0001 110
+ ID_MMFR3_EL1 = 0xc00f, // 11 000 0000 0001 111
+ ID_ISAR0_EL1 = 0xc010, // 11 000 0000 0010 000
+ ID_ISAR1_EL1 = 0xc011, // 11 000 0000 0010 001
+ ID_ISAR2_EL1 = 0xc012, // 11 000 0000 0010 010
+ ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011
+ ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100
+ ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101
+ ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000
+ ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001
+ ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000
+ ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001
+ ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100
+ ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101
+ ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000
+ ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
+ ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
+ ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
+ MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000
+ MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001
+ MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010
+ RVBAR_EL1 = 0xc601, // 11 000 1100 0000 001
+ RVBAR_EL2 = 0xe601, // 11 100 1100 0000 001
+ RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001
+ ISR_EL1 = 0xc608, // 11 000 1100 0001 000
+ CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001
+ CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010
+ };
+
+ enum SysRegWOValues {
+ DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000
+ OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100
+ PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100
+ };
+
+ enum SysRegValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ OSDTRRX_EL1 = 0x8002, // 10 000 0000 0000 010
+ OSDTRTX_EL1 = 0x801a, // 10 000 0000 0011 010
+ TEECR32_EL1 = 0x9000, // 10 010 0000 0000 000
+ MDCCINT_EL1 = 0x8010, // 10 000 0000 0010 000
+ MDSCR_EL1 = 0x8012, // 10 000 0000 0010 010
+ DBGDTR_EL0 = 0x9820, // 10 011 0000 0100 000
+ OSECCR_EL1 = 0x8032, // 10 000 0000 0110 010
+ DBGVCR32_EL2 = 0xa038, // 10 100 0000 0111 000
+ DBGBVR0_EL1 = 0x8004, // 10 000 0000 0000 100
+ DBGBVR1_EL1 = 0x800c, // 10 000 0000 0001 100
+ DBGBVR2_EL1 = 0x8014, // 10 000 0000 0010 100
+ DBGBVR3_EL1 = 0x801c, // 10 000 0000 0011 100
+ DBGBVR4_EL1 = 0x8024, // 10 000 0000 0100 100
+ DBGBVR5_EL1 = 0x802c, // 10 000 0000 0101 100
+ DBGBVR6_EL1 = 0x8034, // 10 000 0000 0110 100
+ DBGBVR7_EL1 = 0x803c, // 10 000 0000 0111 100
+ DBGBVR8_EL1 = 0x8044, // 10 000 0000 1000 100
+ DBGBVR9_EL1 = 0x804c, // 10 000 0000 1001 100
+ DBGBVR10_EL1 = 0x8054, // 10 000 0000 1010 100
+ DBGBVR11_EL1 = 0x805c, // 10 000 0000 1011 100
+ DBGBVR12_EL1 = 0x8064, // 10 000 0000 1100 100
+ DBGBVR13_EL1 = 0x806c, // 10 000 0000 1101 100
+ DBGBVR14_EL1 = 0x8074, // 10 000 0000 1110 100
+ DBGBVR15_EL1 = 0x807c, // 10 000 0000 1111 100
+ DBGBCR0_EL1 = 0x8005, // 10 000 0000 0000 101
+ DBGBCR1_EL1 = 0x800d, // 10 000 0000 0001 101
+ DBGBCR2_EL1 = 0x8015, // 10 000 0000 0010 101
+ DBGBCR3_EL1 = 0x801d, // 10 000 0000 0011 101
+ DBGBCR4_EL1 = 0x8025, // 10 000 0000 0100 101
+ DBGBCR5_EL1 = 0x802d, // 10 000 0000 0101 101
+ DBGBCR6_EL1 = 0x8035, // 10 000 0000 0110 101
+ DBGBCR7_EL1 = 0x803d, // 10 000 0000 0111 101
+ DBGBCR8_EL1 = 0x8045, // 10 000 0000 1000 101
+ DBGBCR9_EL1 = 0x804d, // 10 000 0000 1001 101
+ DBGBCR10_EL1 = 0x8055, // 10 000 0000 1010 101
+ DBGBCR11_EL1 = 0x805d, // 10 000 0000 1011 101
+ DBGBCR12_EL1 = 0x8065, // 10 000 0000 1100 101
+ DBGBCR13_EL1 = 0x806d, // 10 000 0000 1101 101
+ DBGBCR14_EL1 = 0x8075, // 10 000 0000 1110 101
+ DBGBCR15_EL1 = 0x807d, // 10 000 0000 1111 101
+ DBGWVR0_EL1 = 0x8006, // 10 000 0000 0000 110
+ DBGWVR1_EL1 = 0x800e, // 10 000 0000 0001 110
+ DBGWVR2_EL1 = 0x8016, // 10 000 0000 0010 110
+ DBGWVR3_EL1 = 0x801e, // 10 000 0000 0011 110
+ DBGWVR4_EL1 = 0x8026, // 10 000 0000 0100 110
+ DBGWVR5_EL1 = 0x802e, // 10 000 0000 0101 110
+ DBGWVR6_EL1 = 0x8036, // 10 000 0000 0110 110
+ DBGWVR7_EL1 = 0x803e, // 10 000 0000 0111 110
+ DBGWVR8_EL1 = 0x8046, // 10 000 0000 1000 110
+ DBGWVR9_EL1 = 0x804e, // 10 000 0000 1001 110
+ DBGWVR10_EL1 = 0x8056, // 10 000 0000 1010 110
+ DBGWVR11_EL1 = 0x805e, // 10 000 0000 1011 110
+ DBGWVR12_EL1 = 0x8066, // 10 000 0000 1100 110
+ DBGWVR13_EL1 = 0x806e, // 10 000 0000 1101 110
+ DBGWVR14_EL1 = 0x8076, // 10 000 0000 1110 110
+ DBGWVR15_EL1 = 0x807e, // 10 000 0000 1111 110
+ DBGWCR0_EL1 = 0x8007, // 10 000 0000 0000 111
+ DBGWCR1_EL1 = 0x800f, // 10 000 0000 0001 111
+ DBGWCR2_EL1 = 0x8017, // 10 000 0000 0010 111
+ DBGWCR3_EL1 = 0x801f, // 10 000 0000 0011 111
+ DBGWCR4_EL1 = 0x8027, // 10 000 0000 0100 111
+ DBGWCR5_EL1 = 0x802f, // 10 000 0000 0101 111
+ DBGWCR6_EL1 = 0x8037, // 10 000 0000 0110 111
+ DBGWCR7_EL1 = 0x803f, // 10 000 0000 0111 111
+ DBGWCR8_EL1 = 0x8047, // 10 000 0000 1000 111
+ DBGWCR9_EL1 = 0x804f, // 10 000 0000 1001 111
+ DBGWCR10_EL1 = 0x8057, // 10 000 0000 1010 111
+ DBGWCR11_EL1 = 0x805f, // 10 000 0000 1011 111
+ DBGWCR12_EL1 = 0x8067, // 10 000 0000 1100 111
+ DBGWCR13_EL1 = 0x806f, // 10 000 0000 1101 111
+ DBGWCR14_EL1 = 0x8077, // 10 000 0000 1110 111
+ DBGWCR15_EL1 = 0x807f, // 10 000 0000 1111 111
+ TEEHBR32_EL1 = 0x9080, // 10 010 0001 0000 000
+ OSDLR_EL1 = 0x809c, // 10 000 0001 0011 100
+ DBGPRCR_EL1 = 0x80a4, // 10 000 0001 0100 100
+ DBGCLAIMSET_EL1 = 0x83c6, // 10 000 0111 1000 110
+ DBGCLAIMCLR_EL1 = 0x83ce, // 10 000 0111 1001 110
+ CSSELR_EL1 = 0xd000, // 11 010 0000 0000 000
+ VPIDR_EL2 = 0xe000, // 11 100 0000 0000 000
+ VMPIDR_EL2 = 0xe005, // 11 100 0000 0000 101
+ CPACR_EL1 = 0xc082, // 11 000 0001 0000 010
+ SCTLR_EL1 = 0xc080, // 11 000 0001 0000 000
+ SCTLR_EL2 = 0xe080, // 11 100 0001 0000 000
+ SCTLR_EL3 = 0xf080, // 11 110 0001 0000 000
+ ACTLR_EL1 = 0xc081, // 11 000 0001 0000 001
+ ACTLR_EL2 = 0xe081, // 11 100 0001 0000 001
+ ACTLR_EL3 = 0xf081, // 11 110 0001 0000 001
+ HCR_EL2 = 0xe088, // 11 100 0001 0001 000
+ SCR_EL3 = 0xf088, // 11 110 0001 0001 000
+ MDCR_EL2 = 0xe089, // 11 100 0001 0001 001
+ SDER32_EL3 = 0xf089, // 11 110 0001 0001 001
+ CPTR_EL2 = 0xe08a, // 11 100 0001 0001 010
+ CPTR_EL3 = 0xf08a, // 11 110 0001 0001 010
+ HSTR_EL2 = 0xe08b, // 11 100 0001 0001 011
+ HACR_EL2 = 0xe08f, // 11 100 0001 0001 111
+ MDCR_EL3 = 0xf099, // 11 110 0001 0011 001
+ TTBR0_EL1 = 0xc100, // 11 000 0010 0000 000
+ TTBR0_EL2 = 0xe100, // 11 100 0010 0000 000
+ TTBR0_EL3 = 0xf100, // 11 110 0010 0000 000
+ TTBR1_EL1 = 0xc101, // 11 000 0010 0000 001
+ TCR_EL1 = 0xc102, // 11 000 0010 0000 010
+ TCR_EL2 = 0xe102, // 11 100 0010 0000 010
+ TCR_EL3 = 0xf102, // 11 110 0010 0000 010
+ VTTBR_EL2 = 0xe108, // 11 100 0010 0001 000
+ VTCR_EL2 = 0xe10a, // 11 100 0010 0001 010
+ DACR32_EL2 = 0xe180, // 11 100 0011 0000 000
+ SPSR_EL1 = 0xc200, // 11 000 0100 0000 000
+ SPSR_EL2 = 0xe200, // 11 100 0100 0000 000
+ SPSR_EL3 = 0xf200, // 11 110 0100 0000 000
+ ELR_EL1 = 0xc201, // 11 000 0100 0000 001
+ ELR_EL2 = 0xe201, // 11 100 0100 0000 001
+ ELR_EL3 = 0xf201, // 11 110 0100 0000 001
+ SP_EL0 = 0xc208, // 11 000 0100 0001 000
+ SP_EL1 = 0xe208, // 11 100 0100 0001 000
+ SP_EL2 = 0xf208, // 11 110 0100 0001 000
+ SPSel = 0xc210, // 11 000 0100 0010 000
+ NZCV = 0xda10, // 11 011 0100 0010 000
+ DAIF = 0xda11, // 11 011 0100 0010 001
+ CurrentEL = 0xc212, // 11 000 0100 0010 010
+ SPSR_irq = 0xe218, // 11 100 0100 0011 000
+ SPSR_abt = 0xe219, // 11 100 0100 0011 001
+ SPSR_und = 0xe21a, // 11 100 0100 0011 010
+ SPSR_fiq = 0xe21b, // 11 100 0100 0011 011
+ FPCR = 0xda20, // 11 011 0100 0100 000
+ FPSR = 0xda21, // 11 011 0100 0100 001
+ DSPSR_EL0 = 0xda28, // 11 011 0100 0101 000
+ DLR_EL0 = 0xda29, // 11 011 0100 0101 001
+ IFSR32_EL2 = 0xe281, // 11 100 0101 0000 001
+ AFSR0_EL1 = 0xc288, // 11 000 0101 0001 000
+ AFSR0_EL2 = 0xe288, // 11 100 0101 0001 000
+ AFSR0_EL3 = 0xf288, // 11 110 0101 0001 000
+ AFSR1_EL1 = 0xc289, // 11 000 0101 0001 001
+ AFSR1_EL2 = 0xe289, // 11 100 0101 0001 001
+ AFSR1_EL3 = 0xf289, // 11 110 0101 0001 001
+ ESR_EL1 = 0xc290, // 11 000 0101 0010 000
+ ESR_EL2 = 0xe290, // 11 100 0101 0010 000
+ ESR_EL3 = 0xf290, // 11 110 0101 0010 000
+ FPEXC32_EL2 = 0xe298, // 11 100 0101 0011 000
+ FAR_EL1 = 0xc300, // 11 000 0110 0000 000
+ FAR_EL2 = 0xe300, // 11 100 0110 0000 000
+ FAR_EL3 = 0xf300, // 11 110 0110 0000 000
+ HPFAR_EL2 = 0xe304, // 11 100 0110 0000 100
+ PAR_EL1 = 0xc3a0, // 11 000 0111 0100 000
+ PMCR_EL0 = 0xdce0, // 11 011 1001 1100 000
+ PMCNTENSET_EL0 = 0xdce1, // 11 011 1001 1100 001
+ PMCNTENCLR_EL0 = 0xdce2, // 11 011 1001 1100 010
+ PMOVSCLR_EL0 = 0xdce3, // 11 011 1001 1100 011
+ PMSELR_EL0 = 0xdce5, // 11 011 1001 1100 101
+ PMCCNTR_EL0 = 0xdce8, // 11 011 1001 1101 000
+ PMXEVTYPER_EL0 = 0xdce9, // 11 011 1001 1101 001
+ PMXEVCNTR_EL0 = 0xdcea, // 11 011 1001 1101 010
+ PMUSERENR_EL0 = 0xdcf0, // 11 011 1001 1110 000
+ PMINTENSET_EL1 = 0xc4f1, // 11 000 1001 1110 001
+ PMINTENCLR_EL1 = 0xc4f2, // 11 000 1001 1110 010
+ PMOVSSET_EL0 = 0xdcf3, // 11 011 1001 1110 011
+ MAIR_EL1 = 0xc510, // 11 000 1010 0010 000
+ MAIR_EL2 = 0xe510, // 11 100 1010 0010 000
+ MAIR_EL3 = 0xf510, // 11 110 1010 0010 000
+ AMAIR_EL1 = 0xc518, // 11 000 1010 0011 000
+ AMAIR_EL2 = 0xe518, // 11 100 1010 0011 000
+ AMAIR_EL3 = 0xf518, // 11 110 1010 0011 000
+ VBAR_EL1 = 0xc600, // 11 000 1100 0000 000
+ VBAR_EL2 = 0xe600, // 11 100 1100 0000 000
+ VBAR_EL3 = 0xf600, // 11 110 1100 0000 000
+ RMR_EL1 = 0xc602, // 11 000 1100 0000 010
+ RMR_EL2 = 0xe602, // 11 100 1100 0000 010
+ RMR_EL3 = 0xf602, // 11 110 1100 0000 010
+ CONTEXTIDR_EL1 = 0xc681, // 11 000 1101 0000 001
+ TPIDR_EL0 = 0xde82, // 11 011 1101 0000 010
+ TPIDR_EL2 = 0xe682, // 11 100 1101 0000 010
+ TPIDR_EL3 = 0xf682, // 11 110 1101 0000 010
+ TPIDRRO_EL0 = 0xde83, // 11 011 1101 0000 011
+ TPIDR_EL1 = 0xc684, // 11 000 1101 0000 100
+ CNTFRQ_EL0 = 0xdf00, // 11 011 1110 0000 000
+ CNTVOFF_EL2 = 0xe703, // 11 100 1110 0000 011
+ CNTKCTL_EL1 = 0xc708, // 11 000 1110 0001 000
+ CNTHCTL_EL2 = 0xe708, // 11 100 1110 0001 000
+ CNTP_TVAL_EL0 = 0xdf10, // 11 011 1110 0010 000
+ CNTHP_TVAL_EL2 = 0xe710, // 11 100 1110 0010 000
+ CNTPS_TVAL_EL1 = 0xff10, // 11 111 1110 0010 000
+ CNTP_CTL_EL0 = 0xdf11, // 11 011 1110 0010 001
+ CNTHP_CTL_EL2 = 0xe711, // 11 100 1110 0010 001
+ CNTPS_CTL_EL1 = 0xff11, // 11 111 1110 0010 001
+ CNTP_CVAL_EL0 = 0xdf12, // 11 011 1110 0010 010
+ CNTHP_CVAL_EL2 = 0xe712, // 11 100 1110 0010 010
+ CNTPS_CVAL_EL1 = 0xff12, // 11 111 1110 0010 010
+ CNTV_TVAL_EL0 = 0xdf18, // 11 011 1110 0011 000
+ CNTV_CTL_EL0 = 0xdf19, // 11 011 1110 0011 001
+ CNTV_CVAL_EL0 = 0xdf1a, // 11 011 1110 0011 010
+ PMEVCNTR0_EL0 = 0xdf40, // 11 011 1110 1000 000
+ PMEVCNTR1_EL0 = 0xdf41, // 11 011 1110 1000 001
+ PMEVCNTR2_EL0 = 0xdf42, // 11 011 1110 1000 010
+ PMEVCNTR3_EL0 = 0xdf43, // 11 011 1110 1000 011
+ PMEVCNTR4_EL0 = 0xdf44, // 11 011 1110 1000 100
+ PMEVCNTR5_EL0 = 0xdf45, // 11 011 1110 1000 101
+ PMEVCNTR6_EL0 = 0xdf46, // 11 011 1110 1000 110
+ PMEVCNTR7_EL0 = 0xdf47, // 11 011 1110 1000 111
+ PMEVCNTR8_EL0 = 0xdf48, // 11 011 1110 1001 000
+ PMEVCNTR9_EL0 = 0xdf49, // 11 011 1110 1001 001
+ PMEVCNTR10_EL0 = 0xdf4a, // 11 011 1110 1001 010
+ PMEVCNTR11_EL0 = 0xdf4b, // 11 011 1110 1001 011
+ PMEVCNTR12_EL0 = 0xdf4c, // 11 011 1110 1001 100
+ PMEVCNTR13_EL0 = 0xdf4d, // 11 011 1110 1001 101
+ PMEVCNTR14_EL0 = 0xdf4e, // 11 011 1110 1001 110
+ PMEVCNTR15_EL0 = 0xdf4f, // 11 011 1110 1001 111
+ PMEVCNTR16_EL0 = 0xdf50, // 11 011 1110 1010 000
+ PMEVCNTR17_EL0 = 0xdf51, // 11 011 1110 1010 001
+ PMEVCNTR18_EL0 = 0xdf52, // 11 011 1110 1010 010
+ PMEVCNTR19_EL0 = 0xdf53, // 11 011 1110 1010 011
+ PMEVCNTR20_EL0 = 0xdf54, // 11 011 1110 1010 100
+ PMEVCNTR21_EL0 = 0xdf55, // 11 011 1110 1010 101
+ PMEVCNTR22_EL0 = 0xdf56, // 11 011 1110 1010 110
+ PMEVCNTR23_EL0 = 0xdf57, // 11 011 1110 1010 111
+ PMEVCNTR24_EL0 = 0xdf58, // 11 011 1110 1011 000
+ PMEVCNTR25_EL0 = 0xdf59, // 11 011 1110 1011 001
+ PMEVCNTR26_EL0 = 0xdf5a, // 11 011 1110 1011 010
+ PMEVCNTR27_EL0 = 0xdf5b, // 11 011 1110 1011 011
+ PMEVCNTR28_EL0 = 0xdf5c, // 11 011 1110 1011 100
+ PMEVCNTR29_EL0 = 0xdf5d, // 11 011 1110 1011 101
+ PMEVCNTR30_EL0 = 0xdf5e, // 11 011 1110 1011 110
+ PMCCFILTR_EL0 = 0xdf7f, // 11 011 1110 1111 111
+ PMEVTYPER0_EL0 = 0xdf60, // 11 011 1110 1100 000
+ PMEVTYPER1_EL0 = 0xdf61, // 11 011 1110 1100 001
+ PMEVTYPER2_EL0 = 0xdf62, // 11 011 1110 1100 010
+ PMEVTYPER3_EL0 = 0xdf63, // 11 011 1110 1100 011
+ PMEVTYPER4_EL0 = 0xdf64, // 11 011 1110 1100 100
+ PMEVTYPER5_EL0 = 0xdf65, // 11 011 1110 1100 101
+ PMEVTYPER6_EL0 = 0xdf66, // 11 011 1110 1100 110
+ PMEVTYPER7_EL0 = 0xdf67, // 11 011 1110 1100 111
+ PMEVTYPER8_EL0 = 0xdf68, // 11 011 1110 1101 000
+ PMEVTYPER9_EL0 = 0xdf69, // 11 011 1110 1101 001
+ PMEVTYPER10_EL0 = 0xdf6a, // 11 011 1110 1101 010
+ PMEVTYPER11_EL0 = 0xdf6b, // 11 011 1110 1101 011
+ PMEVTYPER12_EL0 = 0xdf6c, // 11 011 1110 1101 100
+ PMEVTYPER13_EL0 = 0xdf6d, // 11 011 1110 1101 101
+ PMEVTYPER14_EL0 = 0xdf6e, // 11 011 1110 1101 110
+ PMEVTYPER15_EL0 = 0xdf6f, // 11 011 1110 1101 111
+ PMEVTYPER16_EL0 = 0xdf70, // 11 011 1110 1110 000
+ PMEVTYPER17_EL0 = 0xdf71, // 11 011 1110 1110 001
+ PMEVTYPER18_EL0 = 0xdf72, // 11 011 1110 1110 010
+ PMEVTYPER19_EL0 = 0xdf73, // 11 011 1110 1110 011
+ PMEVTYPER20_EL0 = 0xdf74, // 11 011 1110 1110 100
+ PMEVTYPER21_EL0 = 0xdf75, // 11 011 1110 1110 101
+ PMEVTYPER22_EL0 = 0xdf76, // 11 011 1110 1110 110
+ PMEVTYPER23_EL0 = 0xdf77, // 11 011 1110 1110 111
+ PMEVTYPER24_EL0 = 0xdf78, // 11 011 1110 1111 000
+ PMEVTYPER25_EL0 = 0xdf79, // 11 011 1110 1111 001
+ PMEVTYPER26_EL0 = 0xdf7a, // 11 011 1110 1111 010
+ PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011
+ PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100
+ PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101
+ PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110
+ };
+
+ // Note that these do not inherit from NamedImmMapper. This class is
+ // sufficiently different in its behaviour that I don't believe it's worth
+ // burdening the common NamedImmMapper with abstractions only needed in
+ // this one case.
+ struct SysRegMapper {
+ static const NamedImmMapper::Mapping SysRegPairs[];
+
+ const NamedImmMapper::Mapping *InstPairs;
+ size_t NumInstPairs;
+
+ SysRegMapper() {}
+ uint32_t fromString(StringRef Name, bool &Valid) const;
+ std::string toString(uint32_t Bits, bool &Valid) const;
+ };
+
+ struct MSRMapper : SysRegMapper {
+ static const NamedImmMapper::Mapping MSRPairs[];
+ MSRMapper();
+ };
+
+ struct MRSMapper : SysRegMapper {
+ static const NamedImmMapper::Mapping MRSPairs[];
+ MRSMapper();
+ };
+
+ uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
+}
+
+namespace A64TLBI {
+ enum TLBIValues {
+ Invalid = -1, // Op0 Op1 CRn CRm Op2
+ IPAS2E1IS = 0x6401, // 01 100 1000 0000 001
+ IPAS2LE1IS = 0x6405, // 01 100 1000 0000 101
+ VMALLE1IS = 0x4418, // 01 000 1000 0011 000
+ ALLE2IS = 0x6418, // 01 100 1000 0011 000
+ ALLE3IS = 0x7418, // 01 110 1000 0011 000
+ VAE1IS = 0x4419, // 01 000 1000 0011 001
+ VAE2IS = 0x6419, // 01 100 1000 0011 001
+ VAE3IS = 0x7419, // 01 110 1000 0011 001
+ ASIDE1IS = 0x441a, // 01 000 1000 0011 010
+ VAAE1IS = 0x441b, // 01 000 1000 0011 011
+ ALLE1IS = 0x641c, // 01 100 1000 0011 100
+ VALE1IS = 0x441d, // 01 000 1000 0011 101
+ VALE2IS = 0x641d, // 01 100 1000 0011 101
+ VALE3IS = 0x741d, // 01 110 1000 0011 101
+ VMALLS12E1IS = 0x641e, // 01 100 1000 0011 110
+ VAALE1IS = 0x441f, // 01 000 1000 0011 111
+ IPAS2E1 = 0x6421, // 01 100 1000 0100 001
+ IPAS2LE1 = 0x6425, // 01 100 1000 0100 101
+ VMALLE1 = 0x4438, // 01 000 1000 0111 000
+ ALLE2 = 0x6438, // 01 100 1000 0111 000
+ ALLE3 = 0x7438, // 01 110 1000 0111 000
+ VAE1 = 0x4439, // 01 000 1000 0111 001
+ VAE2 = 0x6439, // 01 100 1000 0111 001
+ VAE3 = 0x7439, // 01 110 1000 0111 001
+ ASIDE1 = 0x443a, // 01 000 1000 0111 010
+ VAAE1 = 0x443b, // 01 000 1000 0111 011
+ ALLE1 = 0x643c, // 01 100 1000 0111 100
+ VALE1 = 0x443d, // 01 000 1000 0111 101
+ VALE2 = 0x643d, // 01 100 1000 0111 101
+ VALE3 = 0x743d, // 01 110 1000 0111 101
+ VMALLS12E1 = 0x643e, // 01 100 1000 0111 110
+ VAALE1 = 0x443f // 01 000 1000 0111 111
+ };
+
+ struct TLBIMapper : NamedImmMapper {
+ const static Mapping TLBIPairs[];
+
+ TLBIMapper();
+ };
+
+ static inline bool NeedsRegister(TLBIValues Val) {
+ switch (Val) {
+ case VMALLE1IS:
+ case ALLE2IS:
+ case ALLE3IS:
+ case ALLE1IS:
+ case VMALLS12E1IS:
+ case VMALLE1:
+ case ALLE2:
+ case ALLE3:
+ case ALLE1:
+ case VMALLS12E1:
+ return false;
+ default:
+ return true;
+ }
+ }
+}
+
+namespace AArch64II {
+
+ enum TOF {
+ //===--------------------------------------------------------------===//
+ // AArch64 Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ // MO_GOT - Represents a relocation referring to the GOT entry of a given
+ // symbol. Used in adrp.
+ MO_GOT,
+
+ // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
+ // GOT entry of a given symbol. Used in ldr only.
+ MO_GOT_LO12,
+
+ // MO_DTPREL_* - Represents a relocation referring to the offset from a
+ // module's dynamic thread pointer. Used in the local-dynamic TLS access
+ // model.
+ MO_DTPREL_G1,
+ MO_DTPREL_G0_NC,
+
+ // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
+ // providing the offset of a variable from the thread-pointer. Used in
+ // initial-exec TLS model where this offset is assigned in the static thread
+ // block and thus known by the dynamic linker.
+ MO_GOTTPREL,
+ MO_GOTTPREL_LO12,
+
+ // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
+ // a TLS descriptor chosen by the dynamic linker. Used for the
+ // general-dynamic and local-dynamic TLS access models where very littls is
+ // known at link-time.
+ MO_TLSDESC,
+ MO_TLSDESC_LO12,
+
+ // MO_TPREL_* - Represents a relocation referring to the offset of a
+ // variable from the thread pointer itself. Used in the local-exec TLS
+ // access model.
+ MO_TPREL_G1,
+ MO_TPREL_G0_NC,
+
+ // MO_LO12 - On a symbol operand, this represents a relocation containing
+ // lower 12 bits of the address. Used in add/sub/ldr/str.
+ MO_LO12
+ };
+}
+
+class APFloat;
+
+namespace A64Imms {
+ bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
+
+ inline bool isFPImm(const APFloat &Val) {
+ uint32_t Imm8;
+ return isFPImm(Val, Imm8);
+ }
+
+ bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
+ bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
+
+ bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+ bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+ // We sometimes want to know whether the immediate is representable with a
+ // MOVN but *not* with a MOVZ (because that would take priority).
+ bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
+
+}
+
+} // end namespace llvm;
+
+#endif
diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt
new file mode 100644
index 0000000..2c28348
--- /dev/null
+++ b/lib/Target/AArch64/Utils/CMakeLists.txt
@@ -0,0 +1,5 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAArch64Utils
+ AArch64BaseInfo.cpp
+ )
diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt
new file mode 100644
index 0000000..1be5375
--- /dev/null
+++ b/lib/Target/AArch64/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AArch64Utils
+parent = AArch64
+required_libraries = Core Support
+add_to_library_groups = AArch64
diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile
new file mode 100644
index 0000000..0f4a645
--- /dev/null
+++ b/lib/Target/AArch64/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/AArch64/Utils/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAArch64Utils
+
+# Hack: we need to include 'main' AArch64 target directory to grab private headers
+#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index a76715a..46915ee 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -110,6 +110,11 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
"Is microcontroller profile ('M' series)">;
+// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
+// See ARMInstrInfo.td for details.
+def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
+ "NaCl trap">;
+
// ARM ISAs.
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index fc6ac90..58c7798 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCObjectStreamer.h"
@@ -45,6 +46,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -182,7 +184,7 @@ namespace {
const size_t TagHeaderSize = 1 + 4;
Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitBytes(CurrentVendor);
Streamer.EmitIntValue(0, 1); // '\0'
Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
@@ -192,14 +194,14 @@ namespace {
// emit each field as its type (ULEB or String)
for (unsigned int i=0; i<Contents.size(); ++i) {
AttributeItemType item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag, 0);
+ Streamer.EmitULEB128IntValue(item.Tag);
switch (item.Type) {
default: llvm_unreachable("Invalid attribute type");
case AttributeItemType::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue, 0);
+ Streamer.EmitULEB128IntValue(item.IntValue);
break;
case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper(), 0);
+ Streamer.EmitBytes(item.StringValue.upper());
Streamer.EmitIntValue(0, 1); // '\0'
break;
}
@@ -340,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ if(ARM::GPRPairRegClass.contains(Reg)) {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ Reg = TRI->getSubReg(Reg, ARM::gsub_0);
+ }
O << ARMInstPrinter::getRegisterName(Reg);
break;
}
@@ -528,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
if (!MO.isReg())
return true;
- const TargetRegisterClass &RC = ARM::GPRRegClass;
const MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
-
- unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
- RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
-
- unsigned Reg = RC.getRegister(RegIdx);
+ unsigned Reg = MO.getReg();
+ if(!ARM::GPRPairRegClass.contains(Reg))
+ return false;
+ Reg = TRI->getSubReg(Reg, ARM::gsub_1);
O << ARMInstPrinter::getRegisterName(Reg);
return false;
}
@@ -657,7 +662,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -667,7 +672,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// We need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- 4/*size*/, 0/*addrspace*/);
+ 4/*size*/);
}
Stubs.clear();
@@ -685,7 +690,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- 4/*size*/, 0/*addrspace*/);
+ 4/*size*/);
}
Stubs.clear();
@@ -699,6 +704,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
+ MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
//===----------------------------------------------------------------------===//
@@ -1682,6 +1692,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
break;
}
+ case ARM::TRAPNaCl: {
+ //.long 0xe7fedef0 @ trap
+ uint32_t Val = 0xe7fedef0UL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
case ARM::tTRAP: {
// Non-Darwin binutils don't yet support the "trap" mnemonic.
// FIXME: Remove this special case when they do.
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index f7392fb..c945e4f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===//
+//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// ARM Assembly printer class.
-//
-//===----------------------------------------------------------------------===//
#ifndef ARMASMPRINTER_H
#define ARMASMPRINTER_H
@@ -54,7 +50,7 @@ public:
}
virtual const char *getPassName() const LLVM_OVERRIDE {
- return "ARM Assembly Printer";
+ return "ARM Assembly / Object Emitter";
}
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0076910..ed001ea 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2719,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
case ARM::t2STMDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
if (Subtarget.isSwift()) {
- // rdar://8402126
int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
switch (Opc) {
default: break;
@@ -4047,7 +4046,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
case ARM::VLDRS:
case ARM::FCONSTS:
case ARM::VMOVSR:
- // rdar://problem/8791586
case ARM::VMOVv8i8:
case ARM::VMOVv4i16:
case ARM::VMOVv2i32:
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d2f6a33..abdd251 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -205,7 +205,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
}
// First prefer the paired physreg.
- if (PairedPhys)
+ if (PairedPhys &&
+ std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
Hints.push_back(PairedPhys);
// Then prefer even or odd registers.
@@ -400,64 +401,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const {
return true;
}
-static void
-emitSPUpdate(bool isARM,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- Pred, PredReg, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- Pred, PredReg, TII);
-}
-
-
-void ARMBaseRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc dl = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- assert(!AFI->isThumb1OnlyFunction() &&
- "This eliminateCallFramePseudoInstr does not support Thumb1!");
- bool isARM = !AFI->isThumbFunction();
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- int PIdx = Old->findFirstPredOperandIdx();
- ARMCC::CondCodes Pred = (PIdx == -1)
- ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
- if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
- unsigned PredReg = Old->getOperand(2).getReg();
- emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
- } else {
- // Note: PredReg is operand 3 for ADJCALLSTACKUP.
- unsigned PredReg = Old->getOperand(3).getReg();
- assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
- emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
- }
- }
- }
- MBB.erase(I);
-}
-
int64_t ARMBaseRegisterInfo::
getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
const MCInstrDesc &Desc = MI->getDesc();
@@ -717,8 +660,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
void
ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
- unsigned i = 0;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -727,13 +670,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned FrameReg;
int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
@@ -755,18 +692,18 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
bool Done = false;
if (!AFI->isThumbFunction())
- Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+ Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
else {
assert(AFI->isThumb2Function());
- Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+ Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII);
}
if (Done)
return;
@@ -786,7 +723,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
if (Offset == 0)
// Must be addrmode4/6.
- MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass);
if (!AFI->isThumbFunction())
@@ -798,6 +735,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, Pred, PredReg, TII);
}
// Update the original instruction to use the scratch register.
- MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true);
}
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index aaa56a9..725033b 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -168,12 +168,9 @@ public:
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
- virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 70a25c2..4891609 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1468,7 +1468,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
if (CPEBB->empty()) {
BBInfo[CPEBB->getNumber()].Size = 0;
- // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ // This block no longer needs to be aligned.
CPEBB->setAlignment(0);
} else
// Entries are sorted by descending alignment, so realign from the front.
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 94c574a..29fcd40 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -146,6 +146,7 @@ class ARMFastISel : public FastISel {
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
+ virtual bool FastLowerArguments();
private:
#include "ARMGenFastISel.inc"
@@ -2099,6 +2100,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
CallingConv::ID CC = F.getCallingConv();
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
@@ -2157,13 +2161,16 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
DstReg).addReg(SrcReg);
- // Mark the register as live out of the function.
- MRI.addLiveOut(VA.getLocReg());
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
}
unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(RetOpc)));
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc));
+ AddOptionalDefs(MIB);
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
@@ -2451,7 +2458,6 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
if (Len >= 2 && Alignment == 2)
VT = MVT::i16;
else {
- assert (Alignment == 1 && "Expected an alignment of 1!");
VT = MVT::i8;
}
}
@@ -2562,7 +2568,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(
+ Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
return true;
}
}
@@ -2877,6 +2884,80 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
return DestReg2;
}
+bool ARMFastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ default:
+ return false;
+ case CallingConv::Fast:
+ case CallingConv::C:
+ case CallingConv::ARM_AAPCS_VFP:
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ break;
+ }
+
+ // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
+ // which are passed in r0 - r3.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 4)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ default:
+ return false;
+ }
+ }
+
+
+ static const uint16_t GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ Idx = 0;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (I->use_empty())
+ continue;
+ unsigned SrcReg = GPRArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(I, ResultReg);
+ }
+
+ return true;
+}
+
namespace llvm {
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 39d27c4..0ca6450 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -119,13 +119,14 @@ static void
emitSPUpdate(bool isARM,
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- ARMCC::AL, 0, TII, MIFlags);
+ Pred, PredReg, TII, MIFlags);
else
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- ARMCC::AL, 0, TII, MIFlags);
+ Pred, PredReg, TII, MIFlags);
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -1430,3 +1431,51 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
AFI->setLRIsSpilledForFarJump(true);
}
}
+
+
+void ARMFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = Old->getOperand(2).getReg();
+ emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
+ Pred, PredReg);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index a1c2b93..efa255a 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -70,6 +70,11 @@ public:
unsigned LdrOpc, bool isVarArg, bool NoGap,
bool(*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const;
+
+ virtual void eliminateCallFramePseudoInstr(
+ MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 939bed7..a83f052 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/CallingConv.h"
@@ -257,6 +258,8 @@ private:
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
+ SDNode *SelectInlineAsm(SDNode *N);
+
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
@@ -2552,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::INLINEASM: {
+ SDNode *ResNode = SelectInlineAsm(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
case ISD::XOR: {
// Select special operations if XOR node forms integer ABS pattern
SDNode *ResNode = SelectABSOp(N);
@@ -3446,6 +3455,138 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(
+ N->getOperand(InlineAsm::Op_AsmString));
+ StringRef AsmString = StringRef(S->getSymbol());
+
+ // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
+ // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
+ // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
+ // respectively. Since there is no constraint to explicitly specify a
+ // reg pair, we search %H operand inside the asm string. If it is found, the
+ // transformation below enforces a GPRPair reg class for "%r" for 64-bit data.
+ if (AsmString.find(":H}") == StringRef::npos)
+ return NULL;
+
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Glue = N->getOperand(NumOps-1);
+
+ // Glue node will be appended late.
+ for(unsigned i = 0; i < NumOps -1; ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ }
+ else
+ continue;
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+ && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag);
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2)
+ continue;
+
+ assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i+1);
+ SDValue V1 = N->getOperand(i+2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ SDValue Chain = SDValue(N,0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
+ Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
+ RegCopy);
+ SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
+ RegCopy);
+ SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+ RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
+ GU = T1.getNode();
+ }
+ else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+ Chain.getValue(1));
+ SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+ T0.getValue(1));
+ SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if(PairedReg.getNode()) {
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+ Flag, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return NULL;
+
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+ CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
+ AsmNodeOperands.size());
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+
bool ARMDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5b3e31f..ef96e56 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -781,6 +781,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
@@ -833,21 +835,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
- maxStoresPerMemset = 8;
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
- maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemset = 8;
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
// Prefer likely predicted branches to selects on out-of-order cores.
- predictableSelectIsExpensive = Subtarget->isLikeA9();
+ PredictableSelectIsExpensive = Subtarget->isLikeA9();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -1926,15 +1928,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
isVarArg));
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
@@ -1963,10 +1959,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(1), Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
// Extract the 2nd half and fall through to handle it as an f64 value.
@@ -1979,6 +1977,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
Flag);
@@ -1988,15 +1987,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- SDValue result;
+ // Update chain and glue.
+ RetOps[0] = Chain;
if (Flag.getNode())
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else // Return Void
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
- return result;
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+ RetOps.data(), RetOps.size());
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2576,7 +2576,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
}
// The remaining GPRs hold either the beginning of variable-argument
-// data, or the beginning of an aggregate passed by value (usuall
+// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
// provided by our caller, and store the unallocated registers there.
// If this is a variadic function, the va_list pointer will begin with
@@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size())
+ return false;
+
+ // Look for <15, ..., 3, -1, 1, 0>.
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+ return false;
+
+ return true;
+}
+
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
@@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVZIPMask(M, VT, WhichResult) ||
isVTRN_v_undef_Mask(M, VT, WhichResult) ||
isVUZP_v_undef_Mask(M, VT, WhichResult) ||
- isVZIP_v_undef_Mask(M, VT, WhichResult));
+ isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+ ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
&VTBLMask[0], 8));
}
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue OpLHS = Op.getOperand(0);
+ EVT VT = OpLHS.getValueType();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ "Expect an v8i16/v16i8 type");
+ OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+ // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ // extract the first 8 bytes into the top double word and the last 8 bytes
+ // into the bottom double word. The v8i16 case is similar.
+ unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+ return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+ DAG.getConstant(ExtractNum, MVT::i32));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
if (VT == MVT::v8i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
if (NewOp.getNode())
@@ -5967,9 +6003,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
- unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
- unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
-
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
@@ -6007,42 +6040,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// cmp storesuccess, #0
// bne- loopMBB
// fallthrough --> exitMBB
- //
- // Note that the registers are explicitly specified because there is not any
- // way to force the register allocator to allocate a register pair.
- //
- // FIXME: The hardcoded registers are not necessary for Thumb2, but we
- // need to properly enforce the restriction that the two output registers
- // for ldrexd must be different.
BB = loopMBB;
+
// Load
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned GPRPair2;
- if (IsMinMax) {
- //We need an extra double register for doing min/max.
- unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(undef)
- .addReg(vallo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2)
- .addReg(r1)
- .addReg(valhi)
- .addImm(ARM::gsub_1);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
}
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
-
+ unsigned StoreLo, StoreHi;
if (IsCmpxchg) {
// Add early exit
for (unsigned i = 0; i < 2; i++) {
@@ -6058,19 +6075,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
}
// Copy to physregs for strexd
- unsigned setlo = MI->getOperand(5).getReg();
- unsigned sethi = MI->getOperand(6).getReg();
- unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(undef)
- .addReg(setlo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
- .addReg(r1)
- .addReg(sethi)
- .addImm(ARM::gsub_1);
+ StoreLo = MI->getOperand(5).getReg();
+ StoreHi = MI->getOperand(6).getReg();
} else if (Op1) {
// Perform binary operation
unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
@@ -6082,32 +6088,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addReg(desthi).addReg(valhi))
.addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(tmpRegLo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
- .addReg(r1)
- .addReg(tmpRegHi)
- .addImm(ARM::gsub_1);
+ StoreLo = tmpRegLo;
+ StoreHi = tmpRegHi;
} else {
// Copy to physregs for strexd
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(vallo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
- .addReg(r1)
- .addReg(valhi)
- .addImm(ARM::gsub_1);
+ StoreLo = vallo;
+ StoreHi = valhi;
}
- unsigned GPRPairStore = GPRPair1;
if (IsMinMax) {
// Compare and branch to exit block.
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -6115,12 +6102,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->addSuccessor(exitMBB);
BB->addSuccessor(contBB);
BB = contBB;
- GPRPairStore = GPRPair2;
+ StoreLo = vallo;
+ StoreHi = valhi;
}
// Store
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(GPRPairStore).addReg(ptr));
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
+ } else {
+ // Marshal a pair...
+ unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(StoreLo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
+ .addReg(r1)
+ .addReg(StoreHi)
+ .addImm(ARM::gsub_1);
+
+ // ...and store it
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ .addReg(StorePair).addReg(ptr));
+ }
// Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0));
@@ -6329,7 +6337,16 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DispatchBB->setIsLandingPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
- BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+ unsigned trap_opcode;
+ if (Subtarget->isThumb()) {
+ trap_opcode = ARM::tTRAP;
+ } else {
+ if (Subtarget->useNaClTrap())
+ trap_opcode = ARM::TRAPNaCl;
+ else
+ trap_opcode = ARM::TRAP;
+ }
+ BuildMI(TrapBB, dl, TII->get(trap_opcode));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
@@ -7123,7 +7140,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LE);
+ /*IsMinMax*/ true, ARMCC::LT);
case ARM::ATOMMAX6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
@@ -7133,7 +7150,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LS);
+ /*IsMinMax*/ true, ARMCC::LO);
case ARM::ATOMUMAX6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
@@ -10343,4 +10360,3 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
-
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 12712c0..9409f35 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -117,7 +117,7 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
SDNPVariadic]>;
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@@ -239,6 +239,9 @@ def IsARM : Predicate<"!Subtarget->isThumb()">,
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
+ AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
@@ -1762,11 +1765,32 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
let Inst{3-0} = opt;
}
-// A5.4 Permanently UNDEFINED instructions.
+/*
+ * A5.4 Permanently UNDEFINED instructions.
+ *
+ * For most targets use UDF #65006, for which the OS will generate SIGTRAP.
+ * Other UDF encodings generate SIGILL.
+ *
+ * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb.
+ * Encoding A1:
+ * 1110 0111 1111 iiii iiii iiii 1111 iiii
+ * Encoding T1:
+ * 1101 1110 iiii iiii
+ * It uses the following encoding:
+ * 1110 0111 1111 1110 1101 1110 1111 0000
+ * - In ARM: UDF #60896;
+ * - In Thumb: UDF #254 followed by a branch-to-self.
+ */
+let isBarrier = 1, isTerminator = 1 in
+def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
+ Requires<[IsARM,UseNaClTrap]> {
+ let Inst = 0xe7fedef0;
+}
let isBarrier = 1, isTerminator = 1 in
def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
"trap", [(trap)]>,
- Requires<[IsARM]> {
+ Requires<[IsARM,DontUseNaClTrap]> {
let Inst = 0xe7ffdefe;
}
@@ -2079,6 +2103,18 @@ def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> {
let Inst{24-23} = 0b11;
}
+def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>;
+def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>;
+def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>;
+def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>;
+
+def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>;
+def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>;
+
// Return From Exception
class RFEI<bit wb, string asm>
: XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 697a8d2..0411ac4 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4264,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
"$Vd, $Vm, #0", NEONvceqz>;
@@ -4277,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
"$Vd, $Vm, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
"$Vd, $Vm, #0", NEONvclez>;
+}
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -4292,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
"$Vd, $Vm, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$Vd, $Vm, #0", NEONvcltz>;
+}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
@@ -5740,6 +5745,10 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+// Fold extracting an element out of a v2i32 into a vfp register.
+def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
+ (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index cf8b302..c9d709e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3481,6 +3481,13 @@ def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary,
def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary,
"srsia","\tsp, $mode", []>;
+
+def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>;
+
+def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
+def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
+
// Return From Exception is a system instruction.
class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a1c21ee..98bd6c1 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1188,7 +1188,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddDeadKill = true;
}
// Never kill the base register in the first instruction.
- // <rdar://problem/11101911>
if (EvenReg == BaseReg)
EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 404634f..4191931 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel {
let LoadLatency = 2; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
+ let ILPWindow = 10; // Don't reschedule small blocks to hide
+ // latency. Minimum latency requirements are already
+ // modeled strictly by reserving resources.
let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 058d4c4..f4d568c 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -14,7 +14,9 @@
#include "ARMSubtarget.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -43,58 +45,83 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
- , HasV4TOps(false)
- , HasV5TOps(false)
- , HasV5TEOps(false)
- , HasV6Ops(false)
- , HasV6T2Ops(false)
- , HasV7Ops(false)
- , HasVFPv2(false)
- , HasVFPv3(false)
- , HasVFPv4(false)
- , HasNEON(false)
- , UseNEONForSinglePrecisionFP(false)
- , UseMulOps(UseFusedMulOps)
- , SlowFPVMLx(false)
- , HasVMLxForwarding(false)
- , SlowFPBrcc(false)
- , InThumbMode(false)
- , HasThumb2(false)
- , IsMClass(false)
- , NoARM(false)
- , PostRAScheduler(false)
- , IsR9Reserved(ReserveR9)
- , UseMovt(false)
- , SupportsTailCall(false)
- , HasFP16(false)
- , HasD16(false)
- , HasHardwareDivide(false)
- , HasHardwareDivideInARM(false)
- , HasT2ExtractPack(false)
- , HasDataBarrier(false)
- , Pref32BitThumb(false)
- , AvoidCPSRPartialUpdate(false)
- , AvoidMOVsShifterOperand(false)
- , HasRAS(false)
- , HasMPExtension(false)
- , FPOnlySP(false)
- , AllowsUnalignedMem(false)
- , Thumb2DSP(false)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
, TargetABI(ARM_ABI_APCS) {
- // Determine default and user specified characteristics
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
+void ARMSubtarget::initializeEnvironment() {
+ HasV4TOps = false;
+ HasV5TOps = false;
+ HasV5TEOps = false;
+ HasV6Ops = false;
+ HasV6T2Ops = false;
+ HasV7Ops = false;
+ HasVFPv2 = false;
+ HasVFPv3 = false;
+ HasVFPv4 = false;
+ HasNEON = false;
+ UseNEONForSinglePrecisionFP = false;
+ UseMulOps = UseFusedMulOps;
+ SlowFPVMLx = false;
+ HasVMLxForwarding = false;
+ SlowFPBrcc = false;
+ InThumbMode = false;
+ HasThumb2 = false;
+ IsMClass = false;
+ NoARM = false;
+ PostRAScheduler = false;
+ IsR9Reserved = ReserveR9;
+ UseMovt = false;
+ SupportsTailCall = false;
+ HasFP16 = false;
+ HasD16 = false;
+ HasHardwareDivide = false;
+ HasHardwareDivideInARM = false;
+ HasT2ExtractPack = false;
+ HasDataBarrier = false;
+ Pref32BitThumb = false;
+ AvoidCPSRPartialUpdate = false;
+ AvoidMOVsShifterOperand = false;
+ HasRAS = false;
+ HasMPExtension = false;
+ FPOnlySP = false;
+ AllowsUnalignedMem = false;
+ Thumb2DSP = false;
+ UseNaClTrap = false;
+}
+
+void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty())
CPUString = "generic";
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
// based on the architecture version.
- std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString);
+ std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(),
+ CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
- ArchFS = ArchFS + "," + FS;
+ ArchFS = ArchFS + "," + FS.str();
else
ArchFS = FS;
}
@@ -111,7 +138,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
- if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass()))
+ if ((TargetTriple.getTriple().find("eabi") != std::string::npos) ||
+ (isTargetIOS() && isMClass()))
// FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
// Darwin-EABI conforms to AACPS but not the rest of EABI.
TargetABI = ARM_ABI_AAPCS;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 64878cd..8ce22e1 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -156,6 +156,9 @@ protected:
/// and such) instructions in Thumb2 code.
bool Thumb2DSP;
+ /// NaCl TRAP instruction is generated instead of the regular TRAP.
+ bool UseNaClTrap;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -199,6 +202,12 @@ protected:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ /// \brief Reset the features for the ARM target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
void computeIssueWidth();
bool hasV4TOps() const { return HasV4TOps; }
@@ -241,6 +250,7 @@ protected:
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasThumb2DSP() const { return Thumb2DSP; }
+ bool useNaClTrap() const { return UseNaClTrap; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index be6bec7..d4caf5c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -46,6 +46,10 @@ public:
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const ARMTargetLowering *getTargetLowering() const {
+ // Implemented by derived classes
+ llvm_unreachable("getTargetLowering not implemented");
+ }
virtual const InstrItineraryData *getInstrItineraryData() const {
return &InstrItins;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 03a23be..01c04b4 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
using namespace llvm;
// Declare the pass initialization routine locally as target-specific passes
@@ -34,18 +35,20 @@ namespace {
class ARMTTI : public ImmutablePass, public TargetTransformInfo {
const ARMBaseTargetMachine *TM;
const ARMSubtarget *ST;
+ const ARMTargetLowering *TLI;
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- ARMTTI() : ImmutablePass(ID), TM(0), ST(0) {
+ ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
llvm_unreachable("This pass cannot be directly constructed");
}
ARMTTI(const ARMBaseTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) {
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
initializeARMTTIPass(*PassRegistry::getPassRegistry());
}
@@ -77,6 +80,52 @@ public:
virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
/// @}
+
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 16;
+ return 0;
+ }
+
+ if (ST->isThumb1Only())
+ return 8;
+ return 16;
+ }
+
+ unsigned getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasNEON())
+ return 128;
+ return 0;
+ }
+
+ return 32;
+ }
+
+ unsigned getMaximumUnrollFactor() const {
+ // These are out of order CPUs:
+ if (ST->isCortexA15() || ST->isSwift())
+ return 2;
+ return 1;
+ }
+
+ unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const;
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
+
+ unsigned getAddressComputationCost(Type *Val) const;
+ /// @}
};
} // end anonymous namespace
@@ -122,3 +171,200 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
}
return 2;
}
+
+unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ // Some arithmetic, load and store operations have specific instructions
+ // to cast up/down their types automatically at no extra cost.
+ // TODO: Get these tables to know at least what the related operations are.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+
+ // Vector float <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+
+ // Vector double <-> i32 conversions.
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
+ };
+
+ if (SrcTy.isVector() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
+ array_lengthof(NEONVectorConversionTbl),
+ ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorConversionTbl[Idx].Cost;
+ }
+
+ // Scalar float to integer conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
+ { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
+ { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
+ };
+ if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
+ array_lengthof(NEONFloatConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONFloatConversionTbl[Idx].Cost;
+ }
+
+
+ // Scalar integer to float conversions.
+ static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
+ { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
+ { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
+ { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
+ };
+
+ if (SrcTy.isInteger() && ST->hasNEON()) {
+ int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
+ array_lengthof(NEONIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONIntegerConversionTbl[Idx].Cost;
+ }
+
+ // Scalar integer conversion costs.
+ static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ // i16 -> i64 requires two dependent operations.
+ { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
+
+ // Truncates on i64 are assumed to be free.
+ { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
+ { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
+ };
+
+ if (SrcTy.isInteger()) {
+ int Idx =
+ ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
+ array_lengthof(ARMIntegerConversionTbl),
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return ARMIntegerConversionTbl[Idx].Cost;
+ }
+
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) const {
+ // Penalize inserting into an D-subregister. We end up with a three times
+ // lower estimated throughput on swift.
+ if (ST->isSwift() &&
+ Opcode == Instruction::InsertElement &&
+ ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return 3;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
+}
+
+unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ // On NEON a a vector select gets lowered to vbsl.
+ if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ return LT.first;
+ }
+
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+}
+
+unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ // We only handle costs of reverse shuffles for now.
+ if (Kind != SK_Reverse)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a double
+ // word (vrev) or two if we shuffle a quad word (vrev, vext).
+ { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
+ { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
+
+ { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
+ { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
+ };
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
+ ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+
+ return LT.first * NEONShuffleTbl[Idx].Cost;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ad37a21..6c678fd 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -18,7 +18,9 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -28,6 +30,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
@@ -250,6 +253,13 @@ public:
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
+
+ // Set ELF header flags.
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
+ MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
// Implementation of the MCTargetAsmParser interface:
@@ -259,6 +269,7 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirective(AsmToken DirectiveID);
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
unsigned checkTargetMatchPredicate(MCInst &Inst);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -271,7 +282,7 @@ public:
namespace {
/// ARMOperand - Instances of this class represent a parsed ARM machine
-/// instruction.
+/// operand.
class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
k_CondCode,
@@ -2557,7 +2568,7 @@ int ARMAsmParser::tryParseShiftRegister(
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
- if (getParser().ParseExpression(ShiftExpr, EndLoc)) {
+ if (getParser().parseExpression(ShiftExpr, EndLoc)) {
Error(ImmLoc, "invalid immediate shift value");
return -1;
}
@@ -2640,7 +2651,7 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat left bracket token.
const MCExpr *ImmVal;
- if (getParser().ParseExpression(ImmVal))
+ if (getParser().parseExpression(ImmVal))
return true;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
if (!MCE)
@@ -2785,7 +2796,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *Expr;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(Expr)) {
+ if (getParser().parseExpression(Expr)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -2998,7 +3009,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().ParseExpression(LaneIndex)) {
+ if (getParser().parseExpression(LaneIndex)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3316,7 +3327,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc Loc = Parser.getTok().getLoc();
const MCExpr *MemBarrierID;
- if (getParser().ParseExpression(MemBarrierID)) {
+ if (getParser().parseExpression(MemBarrierID)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3532,7 +3543,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
const MCExpr *ShiftAmount;
SMLoc Loc = Parser.getTok().getLoc();
SMLoc EndLoc;
- if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
Error(Loc, "illegal expression");
return MatchOperand_ParseFail;
}
@@ -3612,7 +3623,7 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *ShiftAmount;
SMLoc EndLoc;
- if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
Error(ExLoc, "malformed shift expression");
return MatchOperand_ParseFail;
}
@@ -3673,7 +3684,7 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *ShiftAmount;
SMLoc EndLoc;
- if (getParser().ParseExpression(ShiftAmount, EndLoc)) {
+ if (getParser().parseExpression(ShiftAmount, EndLoc)) {
Error(ExLoc, "malformed rotate expression");
return MatchOperand_ParseFail;
}
@@ -3710,7 +3721,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *LSBExpr;
SMLoc E = Parser.getTok().getLoc();
- if (getParser().ParseExpression(LSBExpr)) {
+ if (getParser().parseExpression(LSBExpr)) {
Error(E, "malformed immediate expression");
return MatchOperand_ParseFail;
}
@@ -3743,7 +3754,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const MCExpr *WidthExpr;
SMLoc EndLoc;
- if (getParser().ParseExpression(WidthExpr, EndLoc)) {
+ if (getParser().parseExpression(WidthExpr, EndLoc)) {
Error(E, "malformed immediate expression");
return MatchOperand_ParseFail;
}
@@ -3839,7 +3850,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *Offset;
SMLoc E;
- if (getParser().ParseExpression(Offset, E))
+ if (getParser().parseExpression(Offset, E))
return MatchOperand_ParseFail;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
if (!CE) {
@@ -4226,9 +4237,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (BaseRegNum == -1)
return Error(BaseRegTok.getLoc(), "register expected");
- // The next token must either be a comma or a closing bracket.
+ // The next token must either be a comma, a colon or a closing bracket.
const AsmToken &Tok = Parser.getTok();
- if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+ if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) &&
+ !Tok.is(AsmToken::RBrac))
return Error(Tok.getLoc(), "malformed memory operand");
if (Tok.is(AsmToken::RBrac)) {
@@ -4248,8 +4260,11 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return false;
}
- assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!");
- Parser.Lex(); // Eat the comma.
+ assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) &&
+ "Lost colon or comma in memory operand?!");
+ if (Tok.is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ }
// If we have a ':', it's an alignment specifier.
if (Parser.getTok().is(AsmToken::Colon)) {
@@ -4257,7 +4272,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
const MCExpr *Expr;
- if (getParser().ParseExpression(Expr))
+ if (getParser().parseExpression(Expr))
return true;
// The expression has to be a constant. Memory references with relocations
@@ -4313,7 +4328,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
bool isNegative = getParser().getTok().is(AsmToken::Minus);
const MCExpr *Offset;
- if (getParser().ParseExpression(Offset))
+ if (getParser().parseExpression(Offset))
return true;
// The expression has to be a constant. Memory references with relocations
@@ -4432,7 +4447,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Parser.Lex(); // Eat hash token.
const MCExpr *Expr;
- if (getParser().ParseExpression(Expr))
+ if (getParser().parseExpression(Expr))
return true;
// Range check the immediate.
// lsl, ror: 0 <= imm <= 31
@@ -4461,7 +4476,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Anything that can accept a floating point constant as an operand
- // needs to go through here, as the regular ParseExpression is
+ // needs to go through here, as the regular parseExpression is
// integer only.
//
// This routine still creates a generic Immediate operand, containing
@@ -4581,7 +4596,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
// identifier (like labels) as expressions and create them as immediates.
const MCExpr *IdVal;
S = Parser.getTok().getLoc();
- if (getParser().ParseExpression(IdVal))
+ if (getParser().parseExpression(IdVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
@@ -4600,7 +4615,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
if (Parser.getTok().isNot(AsmToken::Colon)) {
bool isNegative = Parser.getTok().is(AsmToken::Minus);
const MCExpr *ImmVal;
- if (getParser().ParseExpression(ImmVal))
+ if (getParser().parseExpression(ImmVal))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
if (CE) {
@@ -4610,6 +4625,15 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
}
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+
+ // There can be a trailing '!' on operands that we want as a separate
+ // '!' Token operand. Handle that here. For example, the compatibilty
+ // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'.
+ if (Parser.getTok().is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(),
+ Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
return false;
}
// w/ a ':' after the '#', it's just like a plain ':'.
@@ -4624,7 +4648,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return true;
const MCExpr *SubExprVal;
- if (getParser().ParseExpression(SubExprVal))
+ if (getParser().parseExpression(SubExprVal))
return true;
const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
@@ -4997,7 +5021,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// In Thumb1, only the branch (B) instruction can be predicated.
if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(NameLoc, "conditional execution not supported in Thumb1");
}
@@ -5011,14 +5035,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Mnemonic == "it") {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
if (ITMask.size() > 3) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "too many conditions on IT instruction");
}
unsigned Mask = 8;
for (unsigned i = ITMask.size(); i != 0; --i) {
char pos = ITMask[i - 1];
if (pos != 't' && pos != 'e') {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
}
Mask >>= 1;
@@ -5044,14 +5068,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// If we had a carry-set on an instruction that can't do that, issue an
// error.
if (!CanAcceptCarrySet && CarrySetting) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(NameLoc, "instruction '" + Mnemonic +
"' can not set flags, but 's' suffix specified");
}
// If we had a predication code on an instruction that can't do that, issue an
// error.
if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(NameLoc, "instruction '" + Mnemonic +
"' is not predicable, but condition code specified");
}
@@ -5100,7 +5124,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
if (parseOperand(Operands, Mnemonic)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
@@ -5109,7 +5133,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Parse and remember the operand.
if (parseOperand(Operands, Mnemonic)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
}
@@ -5117,7 +5141,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -5148,53 +5172,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
delete Op;
}
- // The vector-compare-to-zero instructions have a literal token "#0" at
- // the end that comes to here as an immediate operand. Convert it to a
- // token to play nicely with the matcher.
- if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" ||
- Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
- Operands.erase(Operands.begin() + 5);
- Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
- delete Op;
- }
- }
- // VCMP{E} does the same thing, but with a different operand count.
- if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 &&
- static_cast<ARMOperand*>(Operands[4])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
- Operands.erase(Operands.begin() + 4);
- Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
- delete Op;
- }
- }
- // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here. Take care not to convert those
- // that should hit the Thumb2 encoding.
- if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0 &&
- (isThumbOne() ||
- // The cc_out operand matches the IT block.
- ((inITBlock() != CarrySetting) &&
- // Neither register operand is a high register.
- (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
- isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
- Operands.erase(Operands.begin() + 5);
- Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
- delete Op;
- }
- }
-
// Adjust operands of ldrexd/strexd to MCK_GPRPair.
// ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
// a single GPRPair reg operand is used in the .td file to replace the two
@@ -7646,10 +7623,10 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- if (getParser().ParseExpression(Value))
+ if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -7793,13 +7770,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
unsigned Reg;
SMLoc SRegLoc, ERegLoc;
if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(SRegLoc, "register name expected");
}
// Shouldn't be anything else.
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Parser.getTok().getLoc(),
"unexpected input in .req directive.");
}
@@ -7817,7 +7794,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// ::= .unreq registername
bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(L, "unexpected input in .unreq directive.");
}
RegisterReqs.erase(Parser.getTok().getIdentifier());
@@ -7847,3 +7824,21 @@ extern "C" void LLVMInitializeARMAsmParser() {
#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+ unsigned Kind) {
+ ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ if (Kind == MCK__35_0 && Op->isImm()) {
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (!CE)
+ return Match_InvalidOperand;
+ if (CE->getValue() == 0)
+ return Match_Success;
+ }
+ return Match_InvalidOperand;
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index d48b37e..2afb20d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -627,8 +627,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
- // FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << (MO2.getImm() << 3);
+ O << ":" << (MO2.getImm() << 3);
}
O << "]" << markup(">");
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1f1b334..e66e985 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -663,25 +664,20 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
- if (TheTriple.getArchName() == "armv4t" ||
- TheTriple.getArchName() == "thumbv4t")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T);
- else if (TheTriple.getArchName() == "armv5e" ||
- TheTriple.getArchName() == "thumbv5e")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ);
- else if (TheTriple.getArchName() == "armv6" ||
- TheTriple.getArchName() == "thumbv6")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6);
- else if (TheTriple.getArchName() == "armv7f" ||
- TheTriple.getArchName() == "thumbv7f")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F);
- else if (TheTriple.getArchName() == "armv7k" ||
- TheTriple.getArchName() == "thumbv7k")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K);
- else if (TheTriple.getArchName() == "armv7s" ||
- TheTriple.getArchName() == "thumbv7s")
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S);
- return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7);
+ object::mach::CPUSubtypeARM CS =
+ StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
+ .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
+ .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
+ .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
+ .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
+ .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
+ .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
+ .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
+ .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
+ .Default(object::mach::CSARM_V7);
+
+ return new DarwinARMAsmBackend(T, TT, CS);
}
if (TheTriple.isOSWindows())
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 9193e40..f98bbd2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -37,7 +37,6 @@ namespace {
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) const;
- virtual unsigned getEFlags() const;
virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
const MCValue &Target,
const MCFragment &F,
@@ -53,11 +52,6 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
ARMELFObjectWriter::~ARMELFObjectWriter() {}
-// FIXME: get the real EABI Version from the Triple.
-unsigned ARMELFObjectWriter::getEFlags() const {
- return ELF::EF_ARM_EABIMASK & DefaultEABIVersion;
-}
-
// In ARM, _MergedGlobals and other most symbols get emitted directly.
// I.e. not as an offset to a section symbol.
// This code is an approximation of what ARM/gcc does.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 39ded8f..418971d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMUnwindOp.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -53,14 +54,27 @@ namespace {
/// by MachO. Beware!
class ARMELFStreamer : public MCELFStreamer {
public:
- ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(Context, TAB, OS, Emitter),
- IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
- }
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool IsThumb)
+ : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
+ FnStart(0), Personality(0), CantUnwind(false) {}
~ARMELFStreamer() {}
+ // ARM exception handling directives
+ virtual void EmitFnStart();
+ virtual void EmitFnEnd();
+ virtual void EmitCantUnwind();
+ virtual void EmitPersonality(const MCSymbol *Per);
+ virtual void EmitHandlerData();
+ virtual void EmitSetFP(unsigned NewFpReg,
+ unsigned NewSpReg,
+ int64_t Offset = 0);
+ virtual void EmitPad(int64_t Offset);
+ virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
virtual void ChangeSection(const MCSection *Section) {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
@@ -119,6 +133,10 @@ public:
}
}
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_ARMELFStreamer;
+ }
+
private:
enum ElfMappingSymbol {
EMS_None,
@@ -172,6 +190,15 @@ private:
SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
}
+ // Helper functions for ARM exception handling directives
+ void Reset();
+
+ void EmitPersonalityFixup(StringRef Name);
+
+ void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+ SectionKind Kind, const MCSymbol &Fn);
+ void SwitchToExTabSection(const MCSymbol &FnStart);
+ void SwitchToExIdxSection(const MCSymbol &FnStart);
bool IsThumb;
int64_t MappingSymbolCounter;
@@ -179,10 +206,200 @@ private:
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
ElfMappingSymbol LastEMS;
- /// @}
+ // ARM Exception Handling Frame Information
+ MCSymbol *ExTab;
+ MCSymbol *FnStart;
+ const MCSymbol *Personality;
+ bool CantUnwind;
};
}
+inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+ unsigned Type,
+ unsigned Flags,
+ SectionKind Kind,
+ const MCSymbol &Fn) {
+ const MCSectionELF &FnSection =
+ static_cast<const MCSectionELF &>(Fn.getSection());
+
+ // Create the name for new section
+ StringRef FnSecName(FnSection.getSectionName());
+ SmallString<128> EHSecName(Prefix);
+ if (FnSecName != ".text") {
+ EHSecName += FnSecName;
+ }
+
+ // Get .ARM.extab or .ARM.exidx section
+ const MCSectionELF *EHSection = NULL;
+ if (const MCSymbol *Group = FnSection.getGroup()) {
+ EHSection = getContext().getELFSection(
+ EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
+ FnSection.getEntrySize(), Group->getName());
+ } else {
+ EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
+ }
+ assert(EHSection);
+
+ // Switch to .ARM.extab or .ARM.exidx section
+ SwitchSection(EHSection);
+ EmitCodeAlignment(4, 0);
+}
+
+inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.extab",
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel(),
+ FnStart);
+}
+
+inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
+ SwitchToEHSection(".ARM.exidx",
+ ELF::SHT_ARM_EXIDX,
+ ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
+ SectionKind::getDataRel(),
+ FnStart);
+}
+
+void ARMELFStreamer::Reset() {
+ ExTab = NULL;
+ FnStart = NULL;
+ Personality = NULL;
+ CantUnwind = false;
+}
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(PersonalitySym,
+ MCSymbolRefExpr::VK_ARM_NONE,
+ getContext());
+
+ AddValueSymbols(PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(
+ MCFixup::Create(DF->getContents().size(), PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
+}
+
+void ARMELFStreamer::EmitFnStart() {
+ assert(FnStart == 0);
+ FnStart = getContext().CreateTempSymbol();
+ EmitLabel(FnStart);
+}
+
+void ARMELFStreamer::EmitFnEnd() {
+ assert(FnStart && ".fnstart must preceeds .fnend");
+
+ // Emit unwind opcodes if there is no .handlerdata directive
+ int PersonalityIndex = -1;
+ if (!ExTab && !CantUnwind) {
+ // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+
+ PersonalityIndex = 1;
+
+ uint32_t Entry = 0;
+ uint32_t NumExtraEntryWords = 0;
+ Entry |= NumExtraEntryWords << 24;
+ Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
+
+ // TODO: This should be generated according to .save, .vsave, .setfp
+ // directives. Currently, we are simply generating FINISH opcode.
+ Entry |= UNWIND_OPCODE_FINISH << 8;
+ Entry |= UNWIND_OPCODE_FINISH;
+
+ EmitIntValue(Entry, 4, 0);
+ }
+
+ // Emit the exception index table entry
+ SwitchToExIdxSection(*FnStart);
+
+ if (PersonalityIndex == 1)
+ EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+
+ const MCSymbolRefExpr *FnStartRef =
+ MCSymbolRefExpr::Create(FnStart,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(FnStartRef, 4, 0);
+
+ if (CantUnwind) {
+ EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
+ } else {
+ const MCSymbolRefExpr *ExTabEntryRef =
+ MCSymbolRefExpr::Create(ExTab,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+ EmitValue(ExTabEntryRef, 4, 0);
+ }
+
+ // Clean exception handling frame information
+ Reset();
+}
+
+void ARMELFStreamer::EmitCantUnwind() {
+ CantUnwind = true;
+}
+
+void ARMELFStreamer::EmitHandlerData() {
+ SwitchToExTabSection(*FnStart);
+
+ // Create .ARM.extab label for offset in .ARM.exidx
+ assert(!ExTab);
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+
+ // Emit Personality
+ assert(Personality && ".personality directive must preceed .handlerdata");
+
+ const MCSymbolRefExpr *PersonalityRef =
+ MCSymbolRefExpr::Create(Personality,
+ MCSymbolRefExpr::VK_ARM_PREL31,
+ getContext());
+
+ EmitValue(PersonalityRef, 4, 0);
+
+ // Emit unwind opcodes
+ uint32_t Entry = 0;
+ uint32_t NumExtraEntryWords = 0;
+
+ // TODO: This should be generated according to .save, .vsave, .setfp
+ // directives. Currently, we are simply generating FINISH opcode.
+ Entry |= NumExtraEntryWords << 24;
+ Entry |= UNWIND_OPCODE_FINISH << 16;
+ Entry |= UNWIND_OPCODE_FINISH << 8;
+ Entry |= UNWIND_OPCODE_FINISH;
+
+ EmitIntValue(Entry, 4, 0);
+}
+
+void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+ Personality = Per;
+}
+
+void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
+ unsigned NewSpReg,
+ int64_t Offset) {
+ // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitPad(int64_t Offset) {
+ // TODO: Not implemented
+}
+
+void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool IsVector) {
+ // TODO: Not implemented
+}
+
namespace llvm {
MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index b404e6c..cd4067a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -64,6 +64,9 @@ public:
return getSubExpr()->FindAssociatedSection();
}
+ // There are no TLS ARMMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index f4958f3..f09fb5a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,11 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMMCTargetDesc.h"
#include "ARMBaseInfo.h"
#include "ARMELFStreamer.h"
#include "ARMMCAsmInfo.h"
+#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -37,6 +38,8 @@
using namespace llvm;
std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
+ Triple triple(TT);
+
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
unsigned Len = TT.size();
@@ -119,6 +122,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
ARMArchFeature += ",+thumb-mode";
}
+ if (triple.isOSNaCl()) {
+ if (ARMArchFeature.empty())
+ ARMArchFeature = "+nacl-trap";
+ else
+ ARMArchFeature += ",+nacl-trap";
+ }
+
return ARMArchFeature;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
new file mode 100644
index 0000000..dad5576
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -0,0 +1,112 @@
+//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the constants for the ARM unwind opcodes and exception
+// handling table entry kinds.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_H
+#define ARM_UNWIND_OP_H
+
+namespace llvm {
+
+ /// ARM exception handling table entry kinds
+ enum ARMEHTEntryKind {
+ EHT_GENERIC = 0x00,
+ EHT_COMPACT = 0x80
+ };
+
+ enum {
+ /// Special entry for the function never unwind
+ EXIDX_CANTUNWIND = 0x1
+ };
+
+ /// ARM-defined frame unwinding opcodes
+ enum ARMUnwindOpcodes {
+ // Format: 00xxxxxx
+ // Purpose: vsp = vsp + ((x << 2) + 4)
+ UNWIND_OPCODE_INC_VSP = 0x00,
+
+ // Format: 01xxxxxx
+ // Purpose: vsp = vsp - ((x << 2) + 4)
+ UNWIND_OPCODE_DEC_VSP = 0x40,
+
+ // Format: 10000000 00000000
+ // Purpose: refuse to unwind
+ UNWIND_OPCODE_REFUSE = 0x8000,
+
+ // Format: 1000xxxx xxxxxxxx
+ // Purpose: pop r[15:12], r[11:4]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000,
+
+ // Format: 1001xxxx
+ // Purpose: vsp = r[x]
+ // Constraint: x != 13 && x != 15
+ UNWIND_OPCODE_SET_VSP = 0x90,
+
+ // Format: 10100xxx
+ // Purpose: pop r[(4+x):4]
+ UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0,
+
+ // Format: 10101xxx
+ // Purpose: pop r14, r[(4+x):4]
+ UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8,
+
+ // Format: 10110000
+ // Purpose: finish
+ UNWIND_OPCODE_FINISH = 0xb0,
+
+ // Format: 10110001 0000xxxx
+ // Purpose: pop r[3:0]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_REG_MASK = 0xb100,
+
+ // Format: 10110010 x(uleb128)
+ // Purpose: vsp = vsp + ((x << 2) + 0x204)
+ UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2,
+
+ // Format: 10110011 xxxxyyyy
+ // Purpose: pop d[(x+y):x]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300,
+
+ // Format: 10111xxx
+ // Purpose: pop d[(8+x):8]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8,
+
+ // Format: 11000xxx
+ // Purpose: pop wR[(10+x):10]
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0,
+
+ // Format: 11000110 xxxxyyyy
+ // Purpose: pop wR[(x+y):x]
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600,
+
+ // Format: 11000111 0000xxxx
+ // Purpose: pop wCGR[3:0]
+ // Constraint: x != 0
+ UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700,
+
+ // Format: 11001000 xxxxyyyy
+ // Purpose: pop d[(16+x+y):(16+x)]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800,
+
+ // Format: 11001001 xxxxyyyy
+ // Purpose: pop d[(x+y):x]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900,
+
+ // Format: 11010xxx
+ // Purpose: pop d[(8+x):8]
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
+ };
+
+}
+
+#endif // ARM_UNWIND_OP_H
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 123ada6..2c3388c 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -43,6 +43,41 @@ emitSPUpdate(MachineBasicBlock &MBB,
MRI, MIFlags);
}
+
+void Thumb1FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+ } else {
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -124,14 +159,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ bool HasFP = hasFP(MF);
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
// Adjust FP so it point to the stack slot that contains the previous FP.
- if (hasFP(MF)) {
+ if (HasFP) {
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0)
.setMIFlags(MachineInstr::FrameSetup));
@@ -146,7 +184,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
MachineInstr::FrameSetup);
- if (STI.isTargetELF() && hasFP(MF))
+ if (STI.isTargetELF() && HasFP)
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index bcfc516..5a300af 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,10 @@ public:
const TargetRegisterInfo *TRI) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
};
} // End llvm namespace
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 57cc7d8..609d502 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -296,47 +296,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
}
}
-static void emitSPUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const TargetInstrInfo &TII, DebugLoc dl,
- const Thumb1RegisterInfo &MRI,
- int NumBytes) {
- emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
- MRI);
-}
-
-void Thumb1RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc dl = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
- } else {
- assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
- emitSPUpdate(MBB, I, TII, dl, *this, Amount);
- }
- }
- }
- MBB.erase(I);
-}
-
/// emitThumbConstant - Emit a series of instructions to materialize a
/// constant.
static void emitThumbConstant(MachineBasicBlock &MBB,
@@ -593,9 +552,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
void
Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
unsigned VReg = 0;
- unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -603,13 +562,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
unsigned FrameReg = ARM::SP;
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MF.getFrameInfo()->getStackSize() + SPAdj;
@@ -646,15 +600,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
assert(AFI->isThumbFunction() &&
"This eliminateFrameIndex only supports Thumb1!");
- if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;
// If we get here, the immediate doesn't fit into the instruction. We folded
@@ -687,11 +641,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
- MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true);
if (UseRR)
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
} else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
bool UseRR = false;
@@ -708,11 +663,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
*this);
MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
- MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+ MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true);
if (UseRR)
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false,
+ false);
} else {
llvm_unreachable("Unexpected opcode!");
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index f2e4b08..ebbab36 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -43,11 +43,6 @@ public:
unsigned PredReg = 0,
unsigned MIFlags = MachineInstr::NoFlags) const;
- /// Code Generation virtual methods...
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
// rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
// however much remains to be handled. Return 'true' if no further
// work is required.
@@ -62,7 +57,8 @@ public:
const TargetRegisterClass *RC,
unsigned Reg) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
};
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index f468861..604abf9 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -470,18 +470,19 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
nl(Out);
if (!PAL.isEmpty()) {
Out << '{'; in(); nl(Out);
- Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
- Out << "AttributeWithIndex PAWI;"; nl(Out);
+ Out << "SmallVector<AttributeSet, 4> Attrs;"; nl(Out);
+ Out << "AttributeSet PAS;"; in(); nl(Out);
for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
- unsigned index = PAL.getSlot(i).Index;
- AttrBuilder attrs(PAL.getSlot(i).Attrs);
- Out << "PAWI.Index = " << index << "U;\n";
- Out << " {\n AttrBuilder B;\n";
-
-#define HANDLE_ATTR(X) \
- if (attrs.contains(Attribute::X)) \
- Out << " B.addAttribute(Attribute::" #X ");\n"; \
- attrs.removeAttribute(Attribute::X);
+ unsigned index = PAL.getSlotIndex(i);
+ AttrBuilder attrs(PAL.getSlotAttributes(i), index);
+ Out << "{"; in(); nl(Out);
+ Out << "AttrBuilder B;"; nl(Out);
+
+#define HANDLE_ATTR(X) \
+ if (attrs.contains(Attribute::X)) { \
+ Out << "B.addAttribute(Attribute::" #X ");"; nl(Out); \
+ attrs.removeAttribute(Attribute::X); \
+ }
HANDLE_ATTR(SExt);
HANDLE_ATTR(ZExt);
@@ -499,6 +500,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(StackProtectStrong);
HANDLE_ATTR(NoCapture);
HANDLE_ATTR(NoRedZone);
HANDLE_ATTR(NoImplicitFloat);
@@ -509,14 +511,23 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
HANDLE_ATTR(NonLazyBind);
HANDLE_ATTR(MinSize);
#undef HANDLE_ATTR
- if (attrs.contains(Attribute::StackAlignment))
- Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n";
- attrs.removeAttribute(Attribute::StackAlignment);
+
+ if (attrs.contains(Attribute::StackAlignment)) {
+ Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment()<<')';
+ nl(Out);
+ attrs.removeAttribute(Attribute::StackAlignment);
+ }
+
assert(!attrs.hasAttributes() && "Unhandled attribute!");
- Out << " PAWI.Attrs = Attribute::get(mod->getContext(), B);\n }";
- nl(Out);
- Out << "Attrs.push_back(PAWI);";
+ Out << "PAS = AttributeSet::get(mod->getContext(), ";
+ if (index == ~0U)
+ Out << "~0U,";
+ else
+ Out << index << "U,";
+ Out << " B);"; out(); nl(Out);
+ Out << "}"; out(); nl(Out);
nl(Out);
+ Out << "Attrs.push_back(PAS);"; nl(Out);
}
Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);";
nl(Out);
@@ -1888,23 +1899,24 @@ void CppWriter::printModuleBody() {
void CppWriter::printProgram(const std::string& fname,
const std::string& mName) {
- Out << "#include <llvm/LLVMContext.h>\n";
- Out << "#include <llvm/Module.h>\n";
- Out << "#include <llvm/DerivedTypes.h>\n";
- Out << "#include <llvm/Constants.h>\n";
- Out << "#include <llvm/GlobalVariable.h>\n";
- Out << "#include <llvm/Function.h>\n";
- Out << "#include <llvm/CallingConv.h>\n";
- Out << "#include <llvm/BasicBlock.h>\n";
- Out << "#include <llvm/Instructions.h>\n";
- Out << "#include <llvm/InlineAsm.h>\n";
- Out << "#include <llvm/Support/FormattedStream.h>\n";
- Out << "#include <llvm/Support/MathExtras.h>\n";
Out << "#include <llvm/Pass.h>\n";
Out << "#include <llvm/PassManager.h>\n";
+
Out << "#include <llvm/ADT/SmallVector.h>\n";
Out << "#include <llvm/Analysis/Verifier.h>\n";
Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <llvm/IR/BasicBlock.h>\n";
+ Out << "#include <llvm/IR/CallingConv.h>\n";
+ Out << "#include <llvm/IR/Constants.h>\n";
+ Out << "#include <llvm/IR/DerivedTypes.h>\n";
+ Out << "#include <llvm/IR/Function.h>\n";
+ Out << "#include <llvm/IR/GlobalVariable.h>\n";
+ Out << "#include <llvm/IR/InlineAsm.h>\n";
+ Out << "#include <llvm/IR/Instructions.h>\n";
+ Out << "#include <llvm/IR/LLVMContext.h>\n";
+ Out << "#include <llvm/IR/Module.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
Out << "#include <algorithm>\n";
Out << "using namespace llvm;\n\n";
Out << "Module* " << fname << "();\n\n";
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index aee43ba..b5b887e 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_target(HexagonCodeGen
HexagonExpandPredSpillCode.cpp
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
+ HexagonFixupHwLoops.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 45f857b..dfbefc8 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -21,14 +21,16 @@
namespace llvm {
class FunctionPass;
+ class ModulePass;
class TargetMachine;
class MachineInstr;
- class MCInst;
+ class HexagonMCInst;
class HexagonAsmPrinter;
class HexagonTargetMachine;
class raw_ostream;
- FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
@@ -53,7 +55,7 @@ namespace llvm {
TargetAsmBackend *createHexagonAsmBackend(const Target &,
const std::string &);
*/
- void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
+ void HexagonLowerToMC(const MachineInstr *MI, HexagonMCInst &MCI,
HexagonAsmPrinter &AP);
} // end namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 58b89d1..88cd3fb 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -14,12 +14,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "HexagonAsmPrinter.h"
#include "Hexagon.h"
-#include "HexagonMCInst.h"
+#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
-#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "MCTargetDesc/HexagonMCInst.h"
#include "InstPrinter/HexagonInstPrinter.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -220,8 +220,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
for (unsigned Index = 0; Index < Size; Index++) {
HexagonMCInst MCI;
- MCI.setStartPacket(Index == 0);
- MCI.setEndPacket(Index == (Size-1));
+ MCI.setPacketStart(Index == 0);
+ MCI.setPacketEnd(Index == (Size-1));
HexagonLowerToMC(BundleMIs[Index], MCI, *this);
OutStreamer.EmitInstruction(MCI);
@@ -230,8 +230,8 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else {
HexagonMCInst MCI;
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
- MCI.setStartPacket(true);
- MCI.setEndPacket(true);
+ MCI.setPacketStart(true);
+ MCI.setPacketEnd(true);
}
HexagonLowerToMC(MI, MCI, *this);
OutStreamer.EmitInstruction(MCI);
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
new file mode 100644
index 0000000..240cc95
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -0,0 +1,183 @@
+//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// The loop start address in the LOOPn instruction is encoded as a distance
+// from the LOOPn instruction itself. If the start address is too far from
+// the LOOPn instruction, the loop needs to be set up manually, i.e. via
+// direct transfers to SAn and LCn.
+// This pass will identify and convert such LOOPn instructions to a proper
+// form.
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonFixupHwLoopsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {
+ initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// \brief Maximum distance between the loop instr and the basic block.
+ /// Just an estimate.
+ static const unsigned MAX_LOOP_DISTANCE = 200;
+
+ /// \brief Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// \brief Add the instruction to set the LC and SA registers explicitly.
+ void convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS);
+
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
+ "Hexagon Hardware Loops Fixup", false, false)
+
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+
+/// \brief Returns true if the instruction is a hardware loop instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = fixupLoopInstrs(MF);
+ return Changed;
+}
+
+
+/// \brief For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+ // First pass - compute the offset of each basic block.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ BlockToInstOffset[MBB] = InstOffset;
+ InstOffset += (MBB->size() * 4);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to
+ // be converted.
+ InstOffset = 0;
+ bool Changed = false;
+ RegScavenger RS;
+
+ // Loop over all the basic blocks.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ InstOffset = BlockToInstOffset[MBB];
+ RS.enterBasicBlock(MBB);
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MIE = MBB->end();
+ MachineBasicBlock::iterator MII = MBB->begin();
+ while (MII != MIE) {
+ if (isHardwareLoop(MII)) {
+ RS.forward(MII);
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int Sub = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ unsigned Dist = Sub > 0 ? Sub : -Sub;
+ if (Dist > MAX_LOOP_DISTANCE) {
+ // Convert to explicity setting LC0 and SA0.
+ convertLoopInstr(MF, MII, RS);
+ MII = MBB->erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ InstOffset += 4;
+ }
+ }
+
+ return Changed;
+}
+
+
+/// \brief convert a loop instruction to a sequence of instructions that
+/// set the LC0 and SA0 register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS) {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
+
+ // First, set the LC0 with the trip count.
+ if (MII->getOperand(1).isReg()) {
+ // Trip count is a register
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(MII->getOperand(1).getReg());
+ } else {
+ // Trip count is an immediate.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ .addImm(MII->getOperand(1).getImm());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(Scratch);
+ }
+ // Then, set the SA0 with the loop start address.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+ .addMBB(MII->getOperand(0).getMBB());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0)
+ .addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 9043cf9..d6a9329 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -327,6 +327,21 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
return true;
}
+void HexagonFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+ // Hexagon_TODO: add code
+ } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+ // Hexagon_TODO: add code
+ } else {
+ llvm_unreachable("Cannot handle this call frame pseudo instruction");
+ }
+ MBB.erase(I);
+}
+
int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
return MF.getFrameInfo()->getObjectOffset(FI);
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index ad87f11..a62c76a 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -35,6 +35,11 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
virtual bool
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 2a00a9f..62aed13 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -27,9 +27,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hwloops"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -37,79 +35,194 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Constants.h"
#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
#include <algorithm>
+#include <vector>
using namespace llvm;
+#ifndef NDEBUG
+static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
+#endif
+
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+namespace llvm {
+ void initializeHexagonHardwareLoopsPass(PassRegistry&);
+}
+
namespace {
class CountValue;
struct HexagonHardwareLoops : public MachineFunctionPass {
- MachineLoopInfo *MLI;
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ const HexagonTargetMachine *TM;
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+#ifndef NDEBUG
+ static int Counter;
+#endif
public:
- static char ID; // Pass identification, replacement for typeid
+ static char ID;
- HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {
+ initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "Hexagon Hardware Loops"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
- /// getCanonicalInductionVariable - Check to see if the loop has a canonical
- /// induction variable.
- /// Should be defined in MachineLoop. Based upon version in class Loop.
- const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
-
- /// getTripCount - Return a loop-invariant LLVM register indicating the
- /// number of times the loop will be executed. If the trip-count cannot
- /// be determined, this return null.
- CountValue *getTripCount(MachineLoop *L) const;
-
- /// isInductionOperation - Return true if the instruction matches the
- /// pattern for an opertion that defines an induction variable.
- bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+ /// Kinds of comparisons in the compare instructions.
+ struct Comparison {
+ enum Kind {
+ EQ = 0x01,
+ NE = 0x02,
+ L = 0x04, // Less-than property.
+ G = 0x08, // Greater-than property.
+ U = 0x40, // Unsigned property.
+ LTs = L,
+ LEs = L | EQ,
+ GTs = G,
+ GEs = G | EQ,
+ LTu = L | U,
+ LEu = L | EQ | U,
+ GTu = G | U,
+ GEu = G | EQ | U
+ };
+
+ static Kind getSwappedComparison(Kind Cmp) {
+ assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
+ if ((Cmp & L) || (Cmp & G))
+ return (Kind)(Cmp ^ (L|G));
+ return Cmp;
+ }
+ };
- /// isInvalidOperation - Return true if the instruction is not valid within
- /// a hardware loop.
+ /// \brief Find the register that contains the loop controlling
+ /// induction variable.
+ /// If successful, it will return true and set the \p Reg, \p IVBump
+ /// and \p IVOp arguments. Otherwise it will return false.
+ /// The returned induction register is the register R that follows the
+ /// following induction pattern:
+ /// loop:
+ /// R = phi ..., [ R.next, LatchBlock ]
+ /// R.next = R + #bump
+ /// if (R.next < #N) goto loop
+ /// IVBump is the immediate value added to R, and IVOp is the instruction
+ /// "R.next = R + #bump".
+ bool findInductionRegister(MachineLoop *L, unsigned &Reg,
+ int64_t &IVBump, MachineInstr *&IVOp) const;
+
+ /// \brief Analyze the statements in a loop to determine if the loop
+ /// has a computable trip count and, if so, return a value that represents
+ /// the trip count expression.
+ CountValue *getLoopTripCount(MachineLoop *L,
+ SmallVector<MachineInstr*, 2> &OldInsts);
+
+ /// \brief Return the expression that represents the number of times
+ /// a loop iterates. The function takes the operands that represent the
+ /// loop start value, loop end value, and induction value. Based upon
+ /// these operands, the function attempts to compute the trip count.
+ /// If the trip count is not directly available (as an immediate value,
+ /// or a register), the function will attempt to insert computation of it
+ /// to the loop's preheader.
+ CountValue *computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const;
+
+ /// \brief Return true if the instruction is not valid within a hardware
+ /// loop.
bool isInvalidLoopOperation(const MachineInstr *MI) const;
- /// containsInavlidInstruction - Return true if the loop contains an
- /// instruction that inhibits using the hardware loop.
+ /// \brief Return true if the loop contains an instruction that inhibits
+ /// using the hardware loop.
bool containsInvalidInstruction(MachineLoop *L) const;
- /// converToHardwareLoop - Given a loop, check if we can convert it to a
- /// hardware loop. If so, then perform the conversion and return true.
+ /// \brief Given a loop, check if we can convert it to a hardware loop.
+ /// If so, then perform the conversion and return true.
bool convertToHardwareLoop(MachineLoop *L);
+ /// \brief Return true if the instruction is now dead.
+ bool isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr*, 1> &DeadPhis) const;
+
+ /// \brief Remove the instruction if it is now dead.
+ void removeIfDead(MachineInstr *MI);
+
+ /// \brief Make sure that the "bump" instruction executes before the
+ /// compare. We need that for the IV fixup, so that the compare
+ /// instruction would not use a bumped value that has not yet been
+ /// defined. If the instructions are out of order, try to reorder them.
+ bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
+
+ /// \brief Get the instruction that loads an immediate value into \p R,
+ /// or 0 if such an instruction does not exist.
+ MachineInstr *defWithImmediate(unsigned R);
+
+ /// \brief Get the immediate value referenced to by \p MO, either for
+ /// immediate operands, or for register operands, where the register
+ /// was defined with an immediate value.
+ int64_t getImmediate(MachineOperand &MO);
+
+ /// \brief Reset the given machine operand to now refer to a new immediate
+ /// value. Assumes that the operand was already referencing an immediate
+ /// value, either directly, or via a register.
+ void setImmediate(MachineOperand &MO, int64_t Val);
+
+ /// \brief Fix the data flow of the induction varible.
+ /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
+ /// |
+ /// +-> back to phi
+ /// where "bump" is the increment of the induction variable:
+ /// iv = iv + #const.
+ /// Due to some prior code transformations, the actual flow may look
+ /// like this:
+ /// phi -+-> bump ---> back to phi
+ /// |
+ /// +-> comparison-in-latch (against upper_bound-bump),
+ /// i.e. the comparison that controls the loop execution may be using
+ /// the value of the induction variable from before the increment.
+ ///
+ /// Return true if the loop's flow is the desired one (i.e. it's
+ /// either been fixed, or no fixing was necessary).
+ /// Otherwise, return false. This can happen if the induction variable
+ /// couldn't be identified, or if the value in the latch's comparison
+ /// cannot be adjusted to reflect the post-bump value.
+ bool fixupInductionVariable(MachineLoop *L);
+
+ /// \brief Given a loop, if it does not have a preheader, create one.
+ /// Return the block that is the preheader.
+ MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
};
char HexagonHardwareLoops::ID = 0;
+#ifndef NDEBUG
+ int HexagonHardwareLoops::Counter = 0;
+#endif
-
- // CountValue class - Abstraction for a trip count of a loop. A
- // smaller vesrsion of the MachineOperand class without the concerns
- // of changing the operand representation.
+ /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
+ /// of the MachineOperand class without the concerns of changing the
+ /// operand representation.
class CountValue {
public:
enum CountValueType {
@@ -119,101 +232,62 @@ namespace {
private:
CountValueType Kind;
union Values {
- unsigned RegNum;
- int64_t ImmVal;
- Values(unsigned r) : RegNum(r) {}
- Values(int64_t i) : ImmVal(i) {}
+ struct {
+ unsigned Reg;
+ unsigned Sub;
+ } R;
+ unsigned ImmVal;
} Contents;
- bool isNegative;
public:
- CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
- isNegative(neg) {}
- explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
- isNegative(i < 0) {}
- CountValueType getType() const { return Kind; }
+ explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
+ Kind = t;
+ if (Kind == CV_Register) {
+ Contents.R.Reg = v;
+ Contents.R.Sub = u;
+ } else {
+ Contents.ImmVal = v;
+ }
+ }
bool isReg() const { return Kind == CV_Register; }
bool isImm() const { return Kind == CV_Immediate; }
- bool isNeg() const { return isNegative; }
unsigned getReg() const {
assert(isReg() && "Wrong CountValue accessor");
- return Contents.RegNum;
+ return Contents.R.Reg;
}
- void setReg(unsigned Val) {
- Contents.RegNum = Val;
+ unsigned getSubReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.R.Sub;
}
- int64_t getImm() const {
+ unsigned getImm() const {
assert(isImm() && "Wrong CountValue accessor");
- if (isNegative) {
- return -Contents.ImmVal;
- }
return Contents.ImmVal;
}
- void setImm(int64_t Val) {
- Contents.ImmVal = Val;
- }
void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
- if (isReg()) { OS << PrintReg(getReg()); }
- if (isImm()) { OS << getImm(); }
- }
- };
-
- struct HexagonFixupHwLoops : public MachineFunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
-
- HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+ if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
+ if (isImm()) { OS << Contents.ImmVal; }
}
-
- private:
- /// Maximum distance between the loop instr and the basic block.
- /// Just an estimate.
- static const unsigned MAX_LOOP_DISTANCE = 200;
-
- /// fixupLoopInstrs - Check the offset between each loop instruction and
- /// the loop basic block to determine if we can use the LOOP instruction
- /// or if we need to set the LC/SA registers explicitly.
- bool fixupLoopInstrs(MachineFunction &MF);
-
- /// convertLoopInstr - Add the instruction to set the LC and SA registers
- /// explicitly.
- void convertLoopInstr(MachineFunction &MF,
- MachineBasicBlock::iterator &MII,
- RegScavenger &RS);
-
};
+} // end anonymous namespace
- char HexagonFixupHwLoops::ID = 0;
-} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
+ "Hexagon Hardware Loops", false, false)
-/// isHardwareLoop - Returns true if the instruction is a hardware loop
-/// instruction.
+/// \brief Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr *MI) {
return MI->getOpcode() == Hexagon::LOOP0_r ||
MI->getOpcode() == Hexagon::LOOP0_i;
}
-/// isCompareEquals - Returns true if the instruction is a compare equals
-/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI) {
- return MI->getOpcode() == Hexagon::CMPEQri;
-}
-
-
-/// createHexagonHardwareLoops - Factory for creating
-/// the hardware loop phase.
FunctionPass *llvm::createHexagonHardwareLoops() {
return new HexagonHardwareLoops();
}
@@ -224,45 +298,149 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- // get the loop information
MLI = &getAnalysis<MachineLoopInfo>();
- // get the register information
MRI = &MF.getRegInfo();
- // the target specific instructio info.
- TII = MF.getTarget().getInstrInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ TM = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
+ TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
+ TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
I != E; ++I) {
MachineLoop *L = *I;
- if (!L->getParentLoop()) {
+ if (!L->getParentLoop())
Changed |= convertToHardwareLoop(L);
- }
}
return Changed;
}
-/// getCanonicalInductionVariable - Check to see if the loop has a canonical
-/// induction variable. We check for a simple recurrence pattern - an
-/// integer recurrence that decrements by one each time through the loop and
-/// ends at zero. If so, return the phi node that corresponds to it.
-///
-/// Based upon the similar code in LoopInfo except this code is specific to
-/// the machine.
-/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+
+bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
+ unsigned &Reg,
+ int64_t &IVBump,
+ MachineInstr *&IVOp
+ ) const {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ if (!Header || !Preheader || !Latch)
+ return false;
+
+ // This pair represents an induction register together with an immediate
+ // value that will be added to it in each loop iteration.
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+
+ // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
+ // from an induction operation
+ // R.next = R + bump
+ // where bump is an immediate value.
+ typedef std::map<unsigned,RegisterBump> InductionMap;
+
+ InductionMap IndMap;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction. Get the operand that corresponds to the
+ // latch block, and see if is a result of an addition of form "reg+imm",
+ // where the "reg" is defined by the PHI node we are looking at.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiOpReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+ if (isAdd) {
+ // If the register operand to the add is the PHI we're
+ // looking at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ if (MRI->getVRegDef(IndReg) == Phi) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ int64_t V = DI->getOperand(2).getImm();
+ IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
+ }
+ }
+ } // for (i)
+ } // for (instr)
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = 0, *FB = 0;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ unsigned CSz = Cond.size();
+ assert (CSz == 1 || CSz == 2);
+ unsigned PredR = Cond[CSz-1].getReg();
+
+ MachineInstr *PredI = MRI->getVRegDef(PredR);
+ if (!PredI->isCompare())
+ return false;
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int CmpImm = 0, CmpMask = 0;
+ bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
+ CmpMask, CmpImm);
+ // Fail if the compare was not analyzed, or it's not comparing a register
+ // with an immediate value. Not checking the mask here, since we handle
+ // the individual compare opcodes (including CMPb) later on.
+ if (!CmpAnalyzed)
+ return false;
+
+ // Exactly one of the input registers to the comparison should be among
+ // the induction registers.
+ InductionMap::iterator IndMapEnd = IndMap.end();
+ InductionMap::iterator F = IndMapEnd;
+ if (CmpReg1 != 0) {
+ InductionMap::iterator F1 = IndMap.find(CmpReg1);
+ if (F1 != IndMapEnd)
+ F = F1;
+ }
+ if (CmpReg2 != 0) {
+ InductionMap::iterator F2 = IndMap.find(CmpReg2);
+ if (F2 != IndMapEnd) {
+ if (F != IndMapEnd)
+ return false;
+ F = F2;
+ }
+ }
+ if (F == IndMapEnd)
+ return false;
+
+ Reg = F->second.first;
+ IVBump = F->second.second;
+ IVOp = MRI->getVRegDef(F->first);
+ return true;
+}
+
+
+/// \brief Analyze the statements in a loop to determine if the loop has
+/// a computable trip count and, if so, return a value that represents
+/// the trip count expression.
///
-const MachineInstr
-*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+/// This function iterates over the phi nodes in the loop to check for
+/// induction variable patterns that are used in the calculation for
+/// the number of time the loop is executed.
+CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
+ SmallVector<MachineInstr*, 2> &OldInsts) {
MachineBasicBlock *TopMBB = L->getTopBlock();
MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
assert(PI != TopMBB->pred_end() &&
"Loop must have more than one incoming edge!");
MachineBasicBlock *Backedge = *PI++;
- if (PI == TopMBB->pred_end()) return 0; // dead loop
+ if (PI == TopMBB->pred_end()) // dead loop?
+ return 0;
MachineBasicBlock *Incoming = *PI++;
- if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
+ if (PI != TopMBB->pred_end()) // multiple backedges?
+ return 0;
- // make sure there is one incoming and one backedge and determine which
+ // Make sure there is one incoming and one backedge and determine which
// is which.
if (L->contains(Incoming)) {
if (L->contains(Backedge))
@@ -271,139 +449,433 @@ const MachineInstr
} else if (!L->contains(Backedge))
return 0;
- // Loop over all of the PHI nodes, looking for a canonical induction variable:
- // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
- // - The recurrence comes from the backedge.
- // - the definition is an induction operatio.n
- for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
- I != E && I->isPHI(); ++I) {
- const MachineInstr *MPhi = &*I;
- unsigned DefReg = MPhi->getOperand(0).getReg();
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
- // Check each operand for the value from the backedge.
- MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
- if (L->contains(MBB)) { // operands comes from the backedge
- // Check if the definition is an induction operation.
- const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
- if (isInductionOperation(DI, DefReg)) {
- return MPhi;
- }
- }
+ // Look for the cmp instruction to determine if we can get a useful trip
+ // count. The trip count can be either a register or an immediate. The
+ // location of the value depends upon the type (reg or imm).
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return 0;
+
+ unsigned IVReg = 0;
+ int64_t IVBump = 0;
+ MachineInstr *IVOp;
+ bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
+ if (!FoundIV)
+ return 0;
+
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+
+ MachineOperand *InitialValue = 0;
+ MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
+ for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
+ MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
+ if (MBB == Preheader)
+ InitialValue = &IV_Phi->getOperand(i);
+ else if (MBB == Latch)
+ IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
+ }
+ if (!InitialValue)
+ return 0;
+
+ SmallVector<MachineOperand,2> Cond;
+ MachineBasicBlock *TB = 0, *FB = 0;
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return 0;
+
+ MachineBasicBlock *Header = L->getHeader();
+ // TB must be non-null. If FB is also non-null, one of them must be
+ // the header. Otherwise, branch to TB could be exiting the loop, and
+ // the fall through can go to the header.
+ assert (TB && "Latch block without a branch?");
+ assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
+ if (!TB || (FB && TB != Header && FB != Header))
+ return 0;
+
+ // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
+ // to put imm(0), followed by P in the vector Cond.
+ // If TB is not the header, it means that the "not-taken" path must lead
+ // to the header.
+ bool Negated = (Cond.size() > 1) ^ (TB != Header);
+ unsigned PredReg = Cond[Cond.size()-1].getReg();
+ MachineInstr *CondI = MRI->getVRegDef(PredReg);
+ unsigned CondOpc = CondI->getOpcode();
+
+ unsigned CmpReg1 = 0, CmpReg2 = 0;
+ int Mask = 0, ImmValue = 0;
+ bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
+ Mask, ImmValue);
+ if (!AnalyzedCmp)
+ return 0;
+
+ // The comparison operator type determines how we compute the loop
+ // trip count.
+ OldInsts.push_back(CondI);
+ OldInsts.push_back(IVOp);
+
+ // Sadly, the following code gets information based on the position
+ // of the operands in the compare instruction. This has to be done
+ // this way, because the comparisons check for a specific relationship
+ // between the operands (e.g. is-less-than), rather than to find out
+ // what relationship the operands are in (as on PPC).
+ Comparison::Kind Cmp;
+ bool isSwapped = false;
+ const MachineOperand &Op1 = CondI->getOperand(1);
+ const MachineOperand &Op2 = CondI->getOperand(2);
+ const MachineOperand *EndValue = 0;
+
+ if (Op1.isReg()) {
+ if (Op2.isImm() || Op1.getReg() == IVReg)
+ EndValue = &Op2;
+ else {
+ EndValue = &Op1;
+ isSwapped = true;
}
}
- return 0;
-}
-/// getTripCount - Return a loop-invariant LLVM value indicating the
-/// number of times the loop will be executed. The trip count can
-/// be either a register or a constant value. If the trip-count
-/// cannot be determined, this returns null.
-///
-/// We find the trip count from the phi instruction that defines the
-/// induction variable. We follow the links to the CMP instruction
-/// to get the trip count.
-///
-/// Based upon getTripCount in LoopInfo.
-///
-CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
- // Check that the loop has a induction variable.
- const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
- if (IV_Inst == 0) return 0;
-
- // Canonical loops will end with a 'cmpeq_ri IV, Imm',
- // if Imm is 0, get the count from the PHI opnd
- // if Imm is -M, than M is the count
- // Otherwise, Imm is the count
- const MachineOperand *IV_Opnd;
- const MachineOperand *InitialValue;
- if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
- InitialValue = &IV_Inst->getOperand(1);
- IV_Opnd = &IV_Inst->getOperand(3);
- } else {
- InitialValue = &IV_Inst->getOperand(3);
- IV_Opnd = &IV_Inst->getOperand(1);
- }
-
- // Look for the cmp instruction to determine if we
- // can get a useful trip count. The trip count can
- // be either a register or an immediate. The location
- // of the value depends upon the type (reg or imm).
- for (MachineRegisterInfo::reg_iterator
- RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
- RI != RE; ++RI) {
- IV_Opnd = &RI.getOperand();
- const MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI)) {
- const MachineOperand &MO = MI->getOperand(2);
- assert(MO.isImm() && "IV Cmp Operand should be 0");
- int64_t ImmVal = MO.getImm();
-
- const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
- assert(L->contains(IV_DefInstr->getParent()) &&
- "IV definition should occurs in loop");
- int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
-
- if (ImmVal == 0) {
- // Make sure the induction variable changes by one on each iteration.
- if (iv_value != 1 && iv_value != -1) {
+ if (!EndValue)
+ return 0;
+
+ switch (CondOpc) {
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPEQrr:
+ Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+ break;
+ case Hexagon::CMPLTrr:
+ Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
+ break;
+ case Hexagon::CMPLTUrr:
+ Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
+ break;
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTUrr:
+ Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
+ break;
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPGTrr:
+ Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
+ break;
+ // Very limited support for byte/halfword compares.
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPhEQri_V4: {
+ if (IVBump != 1)
+ return 0;
+
+ int64_t InitV, EndV;
+ // Since the comparisons are "ri", the EndValue should be an
+ // immediate. Check it just in case.
+ assert(EndValue->isImm() && "Unrecognized latch comparison");
+ EndV = EndValue->getImm();
+ // Allow InitialValue to be a register defined with an immediate.
+ if (InitialValue->isReg()) {
+ if (!defWithImmediate(InitialValue->getReg()))
return 0;
- }
- return new CountValue(InitialValue->getReg(), iv_value > 0);
+ InitV = getImmediate(*InitialValue);
} else {
- assert(InitialValue->isReg() && "Expecting register for init value");
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
- if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
- int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
- if ((count % iv_value) != 0) {
- return 0;
- }
- return new CountValue(count/iv_value);
- }
+ assert(InitialValue->isImm());
+ InitV = InitialValue->getImm();
+ }
+ if (InitV >= EndV)
+ return 0;
+ if (CondOpc == Hexagon::CMPbEQri_V4) {
+ if (!isInt<8>(InitV) || !isInt<8>(EndV))
+ return 0;
+ } else { // Hexagon::CMPhEQri_V4
+ if (!isInt<16>(InitV) || !isInt<16>(EndV))
+ return 0;
}
+ Cmp = !Negated ? Comparison::EQ : Comparison::NE;
+ break;
}
+ default:
+ return 0;
}
- return 0;
+
+ if (isSwapped)
+ Cmp = Comparison::getSwappedComparison(Cmp);
+
+ if (InitialValue->isReg()) {
+ unsigned R = InitialValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return 0;
+ OldInsts.push_back(MRI->getVRegDef(R));
+ }
+ if (EndValue->isReg()) {
+ unsigned R = EndValue->getReg();
+ MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
+ if (!MDT->properlyDominates(DefBB, Header))
+ return 0;
+ }
+
+ return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
}
-/// isInductionOperation - return true if the operation is matches the
-/// pattern that defines an induction variable:
-/// add iv, c
-///
-bool
-HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
- unsigned IVReg) const {
- return (MI->getOpcode() ==
- Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+/// \brief Helper function that returns the expression that represents the
+/// number of times a loop iterates. The function takes the operands that
+/// represent the loop start value, loop end value, and induction value.
+/// Based upon these operands, the function attempts to compute the trip count.
+CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
+ const MachineOperand *Start,
+ const MachineOperand *End,
+ unsigned IVReg,
+ int64_t IVBump,
+ Comparison::Kind Cmp) const {
+ // Cannot handle comparison EQ, i.e. while (A == B).
+ if (Cmp == Comparison::EQ)
+ return 0;
+
+ // Check if either the start or end values are an assignment of an immediate.
+ // If so, use the immediate value rather than the register.
+ if (Start->isReg()) {
+ const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
+ if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
+ Start = &StartValInstr->getOperand(1);
+ }
+ if (End->isReg()) {
+ const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
+ if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
+ End = &EndValInstr->getOperand(1);
+ }
+
+ assert (Start->isReg() || Start->isImm());
+ assert (End->isReg() || End->isImm());
+
+ bool CmpLess = Cmp & Comparison::L;
+ bool CmpGreater = Cmp & Comparison::G;
+ bool CmpHasEqual = Cmp & Comparison::EQ;
+
+ // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
+ // If loop executes while iv is "less" with the iv value going down, then
+ // the iv must wrap.
+ if (CmpLess && IVBump < 0)
+ return 0;
+ // If loop executes while iv is "greater" with the iv value going up, then
+ // the iv must wrap.
+ if (CmpGreater && IVBump > 0)
+ return 0;
+
+ if (Start->isImm() && End->isImm()) {
+ // Both, start and end are immediates.
+ int64_t StartV = Start->getImm();
+ int64_t EndV = End->getImm();
+ int64_t Dist = EndV - StartV;
+ if (Dist == 0)
+ return 0;
+
+ bool Exact = (Dist % IVBump) == 0;
+
+ if (Cmp == Comparison::NE) {
+ if (!Exact)
+ return 0;
+ if ((Dist < 0) ^ (IVBump < 0))
+ return 0;
+ }
+
+ // For comparisons that include the final value (i.e. include equality
+ // with the final value), we need to increase the distance by 1.
+ if (CmpHasEqual)
+ Dist = Dist > 0 ? Dist+1 : Dist-1;
+
+ // assert (CmpLess => Dist > 0);
+ assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
+ // assert (CmpGreater => Dist < 0);
+ assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
+
+ // "Normalized" distance, i.e. with the bump set to +-1.
+ int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump-1)) / IVBump
+ : (-Dist + (-IVBump-1)) / (-IVBump);
+ assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
+
+ uint64_t Count = Dist1;
+
+ if (Count > 0xFFFFFFFFULL)
+ return 0;
+
+ return new CountValue(CountValue::CV_Immediate, Count);
+ }
+
+ // A general case: Start and End are some values, but the actual
+ // iteration count may not be available. If it is not, insert
+ // a computation of it into the preheader.
+
+ // If the induction variable bump is not a power of 2, quit.
+ // Othwerise we'd need a general integer division.
+ if (!isPowerOf2_64(abs(IVBump)))
+ return 0;
+
+ MachineBasicBlock *PH = Loop->getLoopPreheader();
+ assert (PH && "Should have a preheader by now");
+ MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
+ DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
+ : DebugLoc();
+
+ // If Start is an immediate and End is a register, the trip count
+ // will be "reg - imm". Hexagon's "subtract immediate" instruction
+ // is actually "reg + -imm".
+
+ // If the loop IV is going downwards, i.e. if the bump is negative,
+ // then the iteration count (computed as End-Start) will need to be
+ // negated. To avoid the negation, just swap Start and End.
+ if (IVBump < 0) {
+ std::swap(Start, End);
+ IVBump = -IVBump;
+ }
+ // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
+ // Signedness, and "including equality" are preserved.
+
+ bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
+ bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
+
+ int64_t StartV = 0, EndV = 0;
+ if (Start->isImm())
+ StartV = Start->getImm();
+ if (End->isImm())
+ EndV = End->getImm();
+
+ int64_t AdjV = 0;
+ // To compute the iteration count, we would need this computation:
+ // Count = (End - Start + (IVBump-1)) / IVBump
+ // or, when CmpHasEqual:
+ // Count = (End - Start + (IVBump-1)+1) / IVBump
+ // The "IVBump-1" part is the adjustment (AdjV). We can avoid
+ // generating an instruction specifically to add it if we can adjust
+ // the immediate values for Start or End.
+
+ if (CmpHasEqual) {
+ // Need to add 1 to the total iteration count.
+ if (Start->isImm())
+ StartV--;
+ else if (End->isImm())
+ EndV++;
+ else
+ AdjV += 1;
+ }
+
+ if (Cmp != Comparison::NE) {
+ if (Start->isImm())
+ StartV -= (IVBump-1);
+ else if (End->isImm())
+ EndV += (IVBump-1);
+ else
+ AdjV += (IVBump-1);
+ }
+
+ unsigned R = 0, SR = 0;
+ if (Start->isReg()) {
+ R = Start->getReg();
+ SR = Start->getSubReg();
+ } else {
+ R = End->getReg();
+ SR = End->getSubReg();
+ }
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ // Hardware loops cannot handle 64-bit registers. If it's a double
+ // register, it has to have a subregister.
+ if (!SR && RC == &Hexagon::DoubleRegsRegClass)
+ return 0;
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+
+ // Compute DistR (register with the distance between Start and End).
+ unsigned DistR, DistSR;
+
+ // Avoid special case, where the start value is an imm(0).
+ if (Start->isImm() && StartV == 0) {
+ DistR = End->getReg();
+ DistSR = End->getSubReg();
+ } else {
+ const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
+ (RegToImm ? TII->get(Hexagon::SUB_ri) :
+ TII->get(Hexagon::ADD_ri));
+ unsigned SubR = MRI->createVirtualRegister(IntRC);
+ MachineInstrBuilder SubIB =
+ BuildMI(*PH, InsertPos, DL, SubD, SubR);
+
+ if (RegToReg) {
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ } else if (RegToImm) {
+ SubIB.addImm(EndV)
+ .addReg(Start->getReg(), 0, Start->getSubReg());
+ } else { // ImmToReg
+ SubIB.addReg(End->getReg(), 0, End->getSubReg())
+ .addImm(-StartV);
+ }
+ DistR = SubR;
+ DistSR = 0;
+ }
+
+ // From DistR, compute AdjR (register with the adjusted distance).
+ unsigned AdjR, AdjSR;
+
+ if (AdjV == 0) {
+ AdjR = DistR;
+ AdjSR = DistSR;
+ } else {
+ // Generate CountR = ADD DistR, AdjVal
+ unsigned AddR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
+ BuildMI(*PH, InsertPos, DL, AddD, AddR)
+ .addReg(DistR, 0, DistSR)
+ .addImm(AdjV);
+
+ AdjR = AddR;
+ AdjSR = 0;
+ }
+
+ // From AdjR, compute CountR (register with the final count).
+ unsigned CountR, CountSR;
+
+ if (IVBump == 1) {
+ CountR = AdjR;
+ CountSR = AdjSR;
+ } else {
+ // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
+ unsigned Shift = Log2_32(IVBump);
+
+ // Generate NormR = LSR DistR, Shift.
+ unsigned LsrR = MRI->createVirtualRegister(IntRC);
+ const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
+ BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
+ .addReg(AdjR, 0, AdjSR)
+ .addImm(Shift);
+
+ CountR = LsrR;
+ CountSR = 0;
+ }
+
+ return new CountValue(CountValue::CV_Register, CountR, CountSR);
}
-/// isInvalidOperation - Return true if the operation is invalid within
-/// hardware loop.
-bool
-HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+/// \brief Return true if the operation is invalid within hardware loop.
+bool HexagonHardwareLoops::isInvalidLoopOperation(
+ const MachineInstr *MI) const {
// call is not allowed because the callee may use a hardware loop
- if (MI->getDesc().isCall()) {
+ if (MI->getDesc().isCall())
return true;
- }
+
// do not allow nested hardware loops
- if (isHardwareLoop(MI)) {
+ if (isHardwareLoop(MI))
return true;
- }
+
// check if the instruction defines a hardware loop register
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef() &&
- (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
- MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
+ R == Hexagon::SA0 || R == Hexagon::SA1)
return true;
- }
}
return false;
}
-/// containsInvalidInstruction - Return true if the loop contains
-/// an instruction that inhibits the use of the hardware loop function.
-///
+
+/// \brief - Return true if the loop contains an instruction that inhibits
+/// the use of the hardware loop function.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -411,58 +883,184 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI)) {
+ if (isInvalidLoopOperation(MI))
return true;
- }
}
}
return false;
}
-/// converToHardwareLoop - check if the loop is a candidate for
-/// converting to a hardware loop. If so, then perform the
-/// transformation.
+
+/// \brief Returns true if the instruction is dead. This was essentially
+/// copied from DeadMachineInstructionElim::isDead, but with special cases
+/// for inline asm, physical registers and instructions with side effects
+/// removed.
+bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
+ SmallVector<MachineInstr*, 1> &DeadPhis) const {
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (MRI->use_nodbg_empty(Reg))
+ continue;
+
+ typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
+
+ // This instruction has users, but if the only user is the phi node for the
+ // parent block, and the only use of that phi node is this instruction, then
+ // this instruction is dead: both it (and the phi node) can be removed.
+ use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
+ use_nodbg_iterator End = MRI->use_nodbg_end();
+ if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
+ return false;
+
+ MachineInstr *OnePhi = I.getOperand().getParent();
+ for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
+ const MachineOperand &OPO = OnePhi->getOperand(j);
+ if (!OPO.isReg() || !OPO.isDef())
+ continue;
+
+ unsigned OPReg = OPO.getReg();
+ use_nodbg_iterator nextJ;
+ for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
+ J != End; J = nextJ) {
+ nextJ = llvm::next(J);
+ MachineOperand &Use = J.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+
+ // If the phi node has a user that is not MI, bail...
+ if (MI != UseMI)
+ return false;
+ }
+ }
+ DeadPhis.push_back(OnePhi);
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
+ // This procedure was essentially copied from DeadMachineInstructionElim.
+
+ SmallVector<MachineInstr*, 1> DeadPhis;
+ if (isDead(MI, DeadPhis)) {
+ DEBUG(dbgs() << "HW looping will remove: " << *MI);
+
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. Examine each def operand for such references;
+ // if found, mark the DBG_VALUE as undef (but don't delete it).
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ MachineRegisterInfo::use_iterator nextI;
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+ E = MRI->use_end(); I != E; I = nextI) {
+ nextI = llvm::next(I); // I is invalidated by the setReg
+ MachineOperand &Use = I.getOperand();
+ MachineInstr *UseMI = Use.getParent();
+ if (UseMI == MI)
+ continue;
+ if (Use.isDebug())
+ UseMI->getOperand(0).setReg(0U);
+ // This may also be a "instr -> phi -> instr" case which can
+ // be removed too.
+ }
+ }
+
+ MI->eraseFromParent();
+ for (unsigned i = 0; i < DeadPhis.size(); ++i)
+ DeadPhis[i]->eraseFromParent();
+ }
+}
+
+/// \brief Check if the loop is a candidate for converting to a hardware
+/// loop. If so, then perform the transformation.
///
-/// This function works on innermost loops first. A loop can
-/// be converted if it is a counting loop; either a register
-/// value or an immediate.
+/// This function works on innermost loops first. A loop can be converted
+/// if it is a counting loop; either a register value or an immediate.
///
-/// The code makes several assumptions about the representation
-/// of the loop in llvm.
+/// The code makes several assumptions about the representation of the loop
+/// in llvm.
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+ // This is just for sanity.
+ assert(L->getHeader() && "Loop without a header?");
+
bool Changed = false;
// Process nested loops first.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
Changed |= convertToHardwareLoop(*I);
- }
+
// If a nested loop has been converted, then we can't convert this loop.
- if (Changed) {
+ if (Changed)
return Changed;
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = HWLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= HWLoopLimit)
+ return false;
+ Counter++;
}
- // Are we able to determine the trip count for the loop?
- CountValue *TripCount = getTripCount(L);
- if (TripCount == 0) {
- return false;
- }
+#endif
+
// Does the loop contain any invalid instructions?
- if (containsInvalidInstruction(L)) {
+ if (containsInvalidInstruction(L))
return false;
- }
- MachineBasicBlock *Preheader = L->getLoopPreheader();
- // No preheader means there's not place for the loop instr.
- if (Preheader == 0) {
+
+ // Is the induction variable bump feeding the latch condition?
+ if (!fixupInductionVariable(L))
return false;
- }
- MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
MachineBasicBlock *LastMBB = L->getExitingBlock();
// Don't generate hw loop if the loop has more than one exit.
- if (LastMBB == 0) {
+ if (LastMBB == 0)
return false;
- }
+
MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
- if (LastI == LastMBB->end()) {
+ if (LastI == LastMBB->end())
return false;
+
+ // Ensure the loop has a preheader: the loop instruction will be
+ // placed there.
+ bool NewPreheader = false;
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = createPreheaderForLoop(L);
+ if (!Preheader)
+ return false;
+ NewPreheader = true;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ SmallVector<MachineInstr*, 2> OldInsts;
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getLoopTripCount(L, OldInsts);
+ if (TripCount == 0)
+ return false;
+
+ // Is the trip count available in the preheader?
+ if (TripCount->isReg()) {
+ // There will be a use of the register inserted into the preheader,
+ // so make sure that the register is actually defined at that point.
+ MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
+ MachineBasicBlock *BBDef = TCDef->getParent();
+ if (!NewPreheader) {
+ if (!MDT->dominates(BBDef, Preheader))
+ return false;
+ } else {
+ // If we have just created a preheader, the dominator tree won't be
+ // aware of it. Check if the definition of the register dominates
+ // the header, but is not the header itself.
+ if (!MDT->properlyDominates(BBDef, L->getHeader()))
+ return false;
+ }
}
// Determine the loop start.
@@ -470,53 +1068,53 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
if (L->getLoopLatch() != LastMBB) {
// When the exit and latch are not the same, use the latch block as the
// start.
- // The loop start address is used only after the 1st iteration, and the loop
- // latch may contains instrs. that need to be executed after the 1st iter.
+ // The loop start address is used only after the 1st iteration, and the
+ // loop latch may contains instrs. that need to be executed after the
+ // first iteration.
LoopStart = L->getLoopLatch();
// Make sure the latch is a successor of the exit, otherwise it won't work.
- if (!LastMBB->isSuccessor(LoopStart)) {
+ if (!LastMBB->isSuccessor(LoopStart))
return false;
- }
}
- // Convert the loop to a hardware loop
+ // Convert the loop to a hardware loop.
DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
- DebugLoc InsertPosDL;
+ DebugLoc DL;
if (InsertPos != Preheader->end())
- InsertPosDL = InsertPos->getDebugLoc();
+ DL = InsertPos->getDebugLoc();
if (TripCount->isReg()) {
// Create a copy of the loop count register.
- MachineFunction *MF = LastMBB->getParent();
- const TargetRegisterClass *RC =
- MF->getRegInfo().getRegClass(TripCount->getReg());
- unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, InsertPosDL,
- TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
- if (TripCount->isNeg()) {
- unsigned CountReg1 = CountReg;
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, InsertPosDL,
- TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
- }
-
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
+ .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
// Add the Loop instruction to the beginning of the loop.
- BuildMI(*Preheader, InsertPos, InsertPosDL,
- TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ .addMBB(LoopStart)
+ .addReg(CountReg);
} else {
- assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
- // Add the Loop immediate instruction to the beginning of the loop.
+ assert(TripCount->isImm() && "Expecting immediate value for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop,
+ // if the immediate fits in the instructions. Otherwise, we need to
+ // create a new virtual register.
int64_t CountImm = TripCount->getImm();
- BuildMI(*Preheader, InsertPos, InsertPosDL,
- TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+ if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
+ unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
+ .addImm(CountImm);
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
+ .addMBB(LoopStart).addReg(CountReg);
+ } else
+ BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
+ .addMBB(LoopStart).addImm(CountImm);
}
- // Make sure the loop start always has a reference in the CFG. We need to
- // create a BlockAddress operand to get this mechanism to work both the
+ // Make sure the loop start always has a reference in the CFG. We need
+ // to create a BlockAddress operand to get this mechanism to work both the
// MachineBasicBlock and BasicBlock objects need the flag set.
LoopStart->setHasAddressTaken();
// This line is needed to set the hasAddressTaken flag on the BasicBlock
- // object
+ // object.
BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
// Replace the loop branch with an endloop instruction.
@@ -529,13 +1127,12 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// - a conditional branch to the loop start.
if (LastI->getOpcode() == Hexagon::JMP_c ||
LastI->getOpcode() == Hexagon::JMP_cNot) {
- // delete one and change/add an uncond. branch to out of the loop
+ // Delete one and change/add an uncond. branch to out of the loop.
MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
LastI = LastMBB->erase(LastI);
if (!L->contains(BranchTarget)) {
- if (LastI != LastMBB->end()) {
- TII->RemoveBranch(*LastMBB);
- }
+ if (LastI != LastMBB->end())
+ LastI = LastMBB->erase(LastI);
SmallVector<MachineOperand, 0> Cond;
TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
}
@@ -545,110 +1142,414 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
}
delete TripCount;
+ // The induction operation and the comparison may now be
+ // unneeded. If these are unneeded, then remove them.
+ for (unsigned i = 0; i < OldInsts.size(); ++i)
+ removeIfDead(OldInsts[i]);
+
++NumHWLoops;
return true;
}
-/// createHexagonFixupHwLoops - Factory for creating the hardware loop
-/// phase.
-FunctionPass *llvm::createHexagonFixupHwLoops() {
- return new HexagonFixupHwLoops();
+
+bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
+ MachineInstr *CmpI) {
+ assert (BumpI != CmpI && "Bump and compare in the same instruction?");
+
+ MachineBasicBlock *BB = BumpI->getParent();
+ if (CmpI->getParent() != BB)
+ return false;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ // Check if things are in order to begin with.
+ for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+ if (&*I == CmpI)
+ return true;
+
+ // Out of order.
+ unsigned PredR = CmpI->getOperand(0).getReg();
+ bool FoundBump = false;
+ instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt);
+ for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
+ MachineInstr *In = &*I;
+ for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = In->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ if (MO.getReg() == PredR) // Found an intervening use of PredR.
+ return false;
+ }
+ }
+
+ if (In == BumpI) {
+ instr_iterator After = BumpI;
+ instr_iterator From = CmpI;
+ BB->splice(llvm::next(After), BB, From);
+ FoundBump = true;
+ break;
+ }
+ }
+ assert (FoundBump && "Cannot determine instruction order");
+ return FoundBump;
}
-bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
- bool Changed = fixupLoopInstrs(MF);
- return Changed;
+MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
+ MachineInstr *DI = MRI->getVRegDef(R);
+ unsigned DOpc = DI->getOpcode();
+ switch (DOpc) {
+ case Hexagon::TFRI:
+ case Hexagon::TFRI64:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real:
+ return DI;
+ }
+ return 0;
}
-/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
-/// loop instruction then we need to set the LC0 and SA0 registers
-/// explicitly instead of using LOOP(start,count). This function
-/// checks the distance, and generates register assignments if needed.
-///
-/// This function makes two passes over the basic blocks. The first
-/// pass computes the offset of the basic block from the start.
-/// The second pass checks all the loop instructions.
-bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
-
- // Offset of the current instruction from the start.
- unsigned InstOffset = 0;
- // Map for each basic block to it's first instruction.
- DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
-
- // First pass - compute the offset of each basic block.
- for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
- MBB != MBBe; ++MBB) {
- BlockToInstOffset[MBB] = InstOffset;
- InstOffset += (MBB->size() * 4);
- }
-
- // Second pass - check each loop instruction to see if it needs to
- // be converted.
- InstOffset = 0;
- bool Changed = false;
- RegScavenger RS;
-
- // Loop over all the basic blocks.
- for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
- MBB != MBBe; ++MBB) {
- InstOffset = BlockToInstOffset[MBB];
- RS.enterBasicBlock(MBB);
-
- // Loop over all the instructions.
- MachineBasicBlock::iterator MIE = MBB->end();
- MachineBasicBlock::iterator MII = MBB->begin();
- while (MII != MIE) {
- if (isHardwareLoop(MII)) {
- RS.forward(MII);
- assert(MII->getOperand(0).isMBB() &&
- "Expect a basic block as loop operand");
- int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
- diff = (diff > 0 ? diff : -diff);
- if ((unsigned)diff > MAX_LOOP_DISTANCE) {
- // Convert to explicity setting LC0 and SA0.
- convertLoopInstr(MF, MII, RS);
- MII = MBB->erase(MII);
- Changed = true;
- } else {
- ++MII;
+
+int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = defWithImmediate(R);
+ assert(DI && "Need an immediate operand");
+ // All currently supported "define-with-immediate" instructions have the
+ // actual immediate value in the operand(1).
+ int64_t v = DI->getOperand(1).getImm();
+ return v;
+}
+
+
+void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
+ if (MO.isImm()) {
+ MO.setImm(Val);
+ return;
+ }
+
+ assert(MO.isReg());
+ unsigned R = MO.getReg();
+ MachineInstr *DI = defWithImmediate(R);
+ if (MRI->hasOneNonDBGUse(R)) {
+ // If R has only one use, then just change its defining instruction to
+ // the new immediate value.
+ DI->getOperand(1).setImm(Val);
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ MachineBasicBlock &B = *DI->getParent();
+ DebugLoc DL = DI->getDebugLoc();
+ BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
+ .addImm(Val);
+ MO.setReg(NewR);
+}
+
+
+bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+
+ if (!Header || !Preheader || !Latch)
+ return false;
+
+ // These data structures follow the same concept as the corresponding
+ // ones in findInductionRegister (where some comments are).
+ typedef std::pair<unsigned,int64_t> RegisterBump;
+ typedef std::pair<unsigned,RegisterBump> RegisterInduction;
+ typedef std::set<RegisterInduction> RegisterInductionSet;
+
+ // Register candidates for induction variables, with their associated bumps.
+ RegisterInductionSet IndRegs;
+
+ // Look for induction patterns:
+ // vreg1 = PHI ..., [ latch, vreg2 ]
+ // vreg2 = ADD vreg1, imm
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *Phi = &*I;
+
+ // Have a PHI instruction.
+ for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
+ if (Phi->getOperand(i+1).getMBB() != Latch)
+ continue;
+
+ unsigned PhiReg = Phi->getOperand(i).getReg();
+ MachineInstr *DI = MRI->getVRegDef(PhiReg);
+ unsigned UpdOpc = DI->getOpcode();
+ bool isAdd = (UpdOpc == Hexagon::ADD_ri);
+
+ if (isAdd) {
+ // If the register operand to the add/sub is the PHI we are looking
+ // at, this meets the induction pattern.
+ unsigned IndReg = DI->getOperand(1).getReg();
+ if (MRI->getVRegDef(IndReg) == Phi) {
+ unsigned UpdReg = DI->getOperand(0).getReg();
+ int64_t V = DI->getOperand(2).getImm();
+ IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
}
- } else {
- ++MII;
}
- InstOffset += 4;
+ } // for (i)
+ } // for (instr)
+
+ if (IndRegs.empty())
+ return false;
+
+ MachineBasicBlock *TB = 0, *FB = 0;
+ SmallVector<MachineOperand,2> Cond;
+ // AnalyzeBranch returns true if it fails to analyze branch.
+ bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
+ if (NotAnalyzed)
+ return false;
+
+ // Check if the latch branch is unconditional.
+ if (Cond.empty())
+ return false;
+
+ if (TB != Header && FB != Header)
+ // The latch does not go back to the header. Not a latch we know and love.
+ return false;
+
+ // Expecting a predicate register as a condition. It won't be a hardware
+ // predicate register at this point yet, just a vreg.
+ // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
+ // into Cond, followed by the predicate register. For non-negated branches
+ // it's just the register.
+ unsigned CSz = Cond.size();
+ if (CSz != 1 && CSz != 2)
+ return false;
+
+ unsigned P = Cond[CSz-1].getReg();
+ MachineInstr *PredDef = MRI->getVRegDef(P);
+
+ if (!PredDef->isCompare())
+ return false;
+
+ SmallSet<unsigned,2> CmpRegs;
+ MachineOperand *CmpImmOp = 0;
+
+ // Go over all operands to the compare and look for immediate and register
+ // operands. Assume that if the compare has a single register use and a
+ // single immediate operand, then the register is being compared with the
+ // immediate value.
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg()) {
+ // Skip all implicit references. In one case there was:
+ // %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
+ if (MO.isImplicit())
+ continue;
+ if (MO.isUse()) {
+ unsigned R = MO.getReg();
+ if (!defWithImmediate(R)) {
+ CmpRegs.insert(MO.getReg());
+ continue;
+ }
+ // Consider the register to be the "immediate" operand.
+ if (CmpImmOp)
+ return false;
+ CmpImmOp = &MO;
+ }
+ } else if (MO.isImm()) {
+ if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
+ return false;
+ CmpImmOp = &MO;
}
}
- return Changed;
+ if (CmpRegs.empty())
+ return false;
+
+ // Check if the compared register follows the order we want. Fix if needed.
+ for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
+ I != E; ++I) {
+ // This is a success. If the register used in the comparison is one that
+ // we have identified as a bumped (updated) induction register, there is
+ // nothing to do.
+ if (CmpRegs.count(I->first))
+ return true;
+
+ // Otherwise, if the register being compared comes out of a PHI node,
+ // and has been recognized as following the induction pattern, and is
+ // compared against an immediate, we can fix it.
+ const RegisterBump &RB = I->second;
+ if (CmpRegs.count(RB.first)) {
+ if (!CmpImmOp)
+ return false;
+
+ int64_t CmpImm = getImmediate(*CmpImmOp);
+ int64_t V = RB.second;
+ if (V > 0 && CmpImm+V < CmpImm) // Overflow (64-bit).
+ return false;
+ if (V < 0 && CmpImm+V > CmpImm) // Overflow (64-bit).
+ return false;
+ CmpImm += V;
+ // Some forms of cmp-immediate allow u9 and s10. Assume the worst case
+ // scenario, i.e. an 8-bit value.
+ if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
+ return false;
+
+ // Make sure that the compare happens after the bump. Otherwise,
+ // after the fixup, the compare would use a yet-undefined register.
+ MachineInstr *BumpI = MRI->getVRegDef(I->first);
+ bool Order = orderBumpCompare(BumpI, PredDef);
+ if (!Order)
+ return false;
+
+ // Finally, fix the compare instruction.
+ setImmediate(*CmpImmOp, CmpImm);
+ for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = PredDef->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RB.first) {
+ MO.setReg(I->first);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
}
-/// convertLoopInstr - convert a loop instruction to a sequence of instructions
-/// that set the lc and sa register explicitly.
-void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
- MachineBasicBlock::iterator &MII,
- RegScavenger &RS) {
- const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
- MachineBasicBlock *MBB = MII->getParent();
- DebugLoc DL = MII->getDebugLoc();
- unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0);
-
- // First, set the LC0 with the trip count.
- if (MII->getOperand(1).isReg()) {
- // Trip count is a register
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
- .addReg(MII->getOperand(1).getReg());
+
+/// \brief Create a preheader for a given loop.
+MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
+ MachineLoop *L) {
+ if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+ return TmpPH;
+
+ MachineBasicBlock *Header = L->getHeader();
+ MachineBasicBlock *Latch = L->getLoopLatch();
+ MachineFunction *MF = Header->getParent();
+ DebugLoc DL;
+
+ if (!Latch || Header->hasAddressTaken())
+ return 0;
+
+ typedef MachineBasicBlock::instr_iterator instr_iterator;
+ typedef MachineBasicBlock::pred_iterator pred_iterator;
+
+ // Verify that all existing predecessors have analyzable branches
+ // (or no branches at all).
+ typedef std::vector<MachineBasicBlock*> MBBVector;
+ MBBVector Preds(Header->pred_begin(), Header->pred_end());
+ SmallVector<MachineOperand,2> Tmp1;
+ MachineBasicBlock *TB = 0, *FB = 0;
+
+ if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
+ return 0;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
+ if (NotAnalyzed)
+ return 0;
+ }
+ }
+
+ MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
+ MF->insert(Header, NewPH);
+
+ if (Header->pred_size() > 2) {
+ // Ensure that the header has only two predecessors: the preheader and
+ // the loop latch. Any additional predecessors of the header should
+ // join at the newly created preheader. Inspect all PHI nodes from the
+ // header and create appropriate corresponding PHI nodes in the preheader.
+
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+
+ const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
+ MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
+ NewPH->insert(NewPH->end(), NewPN);
+
+ unsigned PR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(PR);
+ unsigned NewPR = MRI->createVirtualRegister(RC);
+ NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
+
+ // Copy all non-latch operands of a header's PHI node to the newly
+ // created PHI node in the preheader.
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ unsigned PredR = PN->getOperand(i).getReg();
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB == Latch)
+ continue;
+
+ NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
+ NewPN->addOperand(MachineOperand::CreateMBB(PredB));
+ }
+
+ // Remove copied operands from the old PHI node and add the value
+ // coming from the preheader's PHI.
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
+ if (PredB != Latch) {
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ }
+ PN->addOperand(MachineOperand::CreateReg(NewPR, false));
+ PN->addOperand(MachineOperand::CreateMBB(NewPH));
+ }
+
} else {
- // Trip count is an immediate.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
- .addImm(MII->getOperand(1).getImm());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
- .addReg(Scratch);
- }
- // Then, set the SA0 with the loop start address.
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
- .addMBB(MII->getOperand(0).getMBB());
- BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+ assert(Header->pred_size() == 2);
+
+ // The header has only two predecessors, but the non-latch predecessor
+ // is not a preheader (e.g. it has other successors, etc.)
+ // In such a case we don't need any extra PHI nodes in the new preheader,
+ // all we need is to adjust existing PHIs in the header to now refer to
+ // the new preheader.
+ for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
+ I != E && I->isPHI(); ++I) {
+ MachineInstr *PN = &*I;
+ for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
+ MachineOperand &MO = PN->getOperand(i+1);
+ if (MO.getMBB() != Latch)
+ MO.setMBB(NewPH);
+ }
+ }
+ }
+
+ // "Reroute" the CFG edges to link in the new preheader.
+ // If any of the predecessors falls through to the header, insert a branch
+ // to the new preheader in that place.
+ SmallVector<MachineOperand,1> Tmp2;
+ SmallVector<MachineOperand,1> EmptyCond;
+
+ TB = FB = 0;
+
+ for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
+ MachineBasicBlock *PB = *I;
+ if (PB != Latch) {
+ Tmp2.clear();
+ bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
+ (void)NotAnalyzed; // supress compiler warning
+ assert (!NotAnalyzed && "Should be analyzable!");
+ if (TB != Header && (Tmp2.empty() || FB != Header))
+ TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
+ PB->ReplaceUsesOfBlockWith(Header, NewPH);
+ }
+ }
+
+ // It can happen that the latch block will fall through into the header.
+ // Insert an unconditional branch to the header.
+ TB = FB = 0;
+ bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
+ (void)LatchNotAnalyzed; // supress compiler warning
+ assert (!LatchNotAnalyzed && "Should be analyzable!");
+ if (!TB && !FB)
+ TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
+
+ // Finally, the branch from the preheader to the header.
+ TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
+ NewPH->addSuccessor(Header);
+
+ return NewPH;
}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index db292f2..3a1c48b 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -15,18 +15,29 @@
#include "Hexagon.h"
#include "HexagonISelLowering.h"
#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-
using namespace llvm;
+static
+cl::opt<unsigned>
+MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
+ cl::Hidden, cl::init(2),
+ cl::desc("Maximum number of uses of a global address such that we still us a"
+ "constant extended instruction"));
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
+namespace llvm {
+ void initializeHexagonDAGToDAGISelPass(PassRegistry&);
+}
+
//===--------------------------------------------------------------------===//
/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
/// instructions for SelectionDAG operations.
@@ -40,19 +51,24 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
// Keep a reference to HexagonTargetMachine.
HexagonTargetMachine& TM;
const HexagonInstrInfo *TII;
-
+ DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
public:
- explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
- : SelectionDAGISel(targetmachine),
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(targetmachine, OptLevel),
Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
TM(targetmachine),
TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
-
+ initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
+ bool hasNumUsesBelowThresGA(SDNode *N) const;
SDNode *Select(SDNode *N);
// Complex Pattern Selectors.
+ inline bool foldGlobalAddress(SDValue &N, SDValue &R);
+ inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
+ bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
@@ -97,7 +113,14 @@ public:
SDNode *SelectAdd(SDNode *N);
bool isConstExtProfitable(SDNode *N) const;
- // Include the pieces autogenerated from the target description.
+// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+// [1..128], used in cmpb.gtu instructions.
+inline SDValue XformU7ToU7M1Imm(signed Imm) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
+}
+
+// Include the pieces autogenerated from the target description.
#include "HexagonGenDAGISel.inc"
};
} // end anonymous namespace
@@ -106,10 +129,23 @@ public:
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
- return new HexagonDAGToDAGISel(TM);
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new HexagonDAGToDAGISel(TM, OptLevel);
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "hexagon-isel",
+ &SelectionDAGISel::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
}
+
static bool IsS11_0_Offset(SDNode * S) {
ConstantSDNode *N = cast<ConstantSDNode>(S);
@@ -608,8 +644,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
// Offset value must be within representable range
// and must have correct alignment properties.
if (TII->isValidAutoIncImm(StoredVT, Val)) {
- SDValue Ops[] = { Value, Base,
- CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
+ Chain};
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -1519,3 +1555,69 @@ bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
return (UseCount <= 1);
}
+
+//===--------------------------------------------------------------------===//
+// Return 'true' if use count of the global address is below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
+ assert(N->getOpcode() == ISD::TargetGlobalAddress &&
+ "Expecting a target global address");
+
+ // Always try to fold the address.
+ if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ return true;
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
+ GlobalAddressUseCountMap.find(GA->getGlobal());
+
+ if (GI == GlobalAddressUseCountMap.end())
+ return false;
+
+ return GI->second <= MaxNumOfUsesForConstExtenders;
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the non GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
+ return foldGlobalAddressImpl(N, R, false);
+}
+
+//===--------------------------------------------------------------------===//
+// Return true if the GP-relative global address can be folded.
+//===--------------------------------------------------------------------===//
+inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
+ return foldGlobalAddressImpl(N, R, true);
+}
+
+//===--------------------------------------------------------------------===//
+// Fold offset of the global address if number of uses are below threshold.
+//===--------------------------------------------------------------------===//
+bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
+ bool ShouldLookForGP) {
+ if (N.getOpcode() == ISD::ADD) {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
+ (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
+ GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
+
+ if (Const && GA &&
+ (GA->getOpcode() == ISD::TargetGlobalAddress)) {
+ if ((N0.getOpcode() == HexagonISD::CONST32) &&
+ !hasNumUsesBelowThresGA(GA))
+ return false;
+ R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
+ Const->getDebugLoc(),
+ N.getValueType(),
+ GA->getOffset() +
+ (uint64_t)Const->getSExtValue());
+ return true;
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 16cec5c..fac931a 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -304,15 +304,9 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
// Analyze return values of ISD::RET
CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
@@ -321,12 +315,17 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ RetOps.push_back(Flag);
- return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
@@ -1016,8 +1015,8 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
DebugLoc dl = Op.getDebugLoc();
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
- HexagonTargetObjectFile &TLOF =
- (HexagonTargetObjectFile&)getObjFileLowering();
+ const HexagonTargetObjectFile &TLOF =
+ static_cast<const HexagonTargetObjectFile &>(getObjFileLowering());
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
}
@@ -1053,8 +1052,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setPrefLoopAlignment(4);
// Limits for inline expansion of memcpy/memmove
- maxStoresPerMemcpy = 6;
- maxStoresPerMemmove = 6;
+ MaxStoresPerMemcpy = 6;
+ MaxStoresPerMemmove = 6;
//
// Library calls for unsupported operations
@@ -1364,11 +1363,18 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 5a415eb..65dab85 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -52,6 +52,8 @@ namespace llvm {
WrapperCP,
WrapperCombineII,
WrapperCombineRR,
+ WrapperCombineRI_V4,
+ WrapperCombineIR_V4,
WrapperPackhl,
WrapperSplatB,
WrapperSplatH,
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 71c620b..587fa7d 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -13,19 +13,19 @@
// *** Must match HexagonBaseInfo.h ***
//===----------------------------------------------------------------------===//
-class Type<bits<5> t> {
+class IType<bits<5> t> {
bits<5> Value = t;
}
-def TypePSEUDO : Type<0>;
-def TypeALU32 : Type<1>;
-def TypeCR : Type<2>;
-def TypeJR : Type<3>;
-def TypeJ : Type<4>;
-def TypeLD : Type<5>;
-def TypeST : Type<6>;
-def TypeSYSTEM : Type<7>;
-def TypeXTYPE : Type<8>;
-def TypeMARKER : Type<31>;
+def TypePSEUDO : IType<0>;
+def TypeALU32 : IType<1>;
+def TypeCR : IType<2>;
+def TypeJR : IType<3>;
+def TypeJ : IType<4>;
+def TypeLD : IType<5>;
+def TypeST : IType<6>;
+def TypeSYSTEM : IType<7>;
+def TypeXTYPE : IType<8>;
+def TypeENDLOOP: IType<31>;
// Maintain list of valid subtargets for each instruction.
class SubTarget<bits<4> value> {
@@ -44,8 +44,8 @@ def HasV5SubT : SubTarget<0x8>;
def NoV5SubT : SubTarget<0x7>;
// Addressing modes for load/store instructions
-class AddrModeType<bits<4> value> {
- bits<4> Value = value;
+class AddrModeType<bits<3> value> {
+ bits<3> Value = value;
}
def NoAddrMode : AddrModeType<0>; // No addressing mode
@@ -55,14 +55,35 @@ def BaseImmOffset : AddrModeType<3>; // Indirect with offset
def BaseLongOffset : AddrModeType<4>; // Indirect with long offset
def BaseRegOffset : AddrModeType<5>; // Indirect with register offset
+class MemAccessSize<bits<3> value> {
+ bits<3> Value = value;
+}
+
+def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
+def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
+def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
+def WordAccess : MemAccessSize<3>;// Word access instrution (memw).
+def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+
+
//===----------------------------------------------------------------------===//
// Intruction Class Declaration +
//===----------------------------------------------------------------------===//
-class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr, InstrItinClass itin, Type type> : Instruction {
- field bits<32> Inst;
+class OpcodeHexagon {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<2> IParse = 0; // Parse bits.
+
+ let Inst{31-28} = IClass;
+ let Inst{15-14} = IParse;
+
+ bits<1> zero = 0;
+}
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr, InstrItinClass itin, IType type>
+ : Instruction, OpcodeHexagon {
let Namespace = "Hexagon";
dag OutOperandList = outs;
@@ -73,48 +94,63 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let Itinerary = itin;
let Size = 4;
- // *** Must match HexagonBaseInfo.h ***
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
// Instruction type according to the ISA.
- Type HexagonType = type;
- let TSFlags{4-0} = HexagonType.Value;
+ IType Type = type;
+ let TSFlags{4-0} = Type.Value;
+
// Solo instructions, i.e., those that cannot be in a packet with others.
- bits<1> isHexagonSolo = 0;
- let TSFlags{5} = isHexagonSolo;
+ bits<1> isSolo = 0;
+ let TSFlags{5} = isSolo;
+
// Predicated instructions.
bits<1> isPredicated = 0;
let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{7} = isPredicatedNew;
-
- // Stores that can be newified.
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<2> opNewBits = 0;
+ let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16.
bits<1> isNVStorable = 0;
- let TSFlags{8} = isNVStorable;
-
- // New-value store instructions.
+ let TSFlags{16} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{9} = isNVStore;
+ let TSFlags{17} = isNVStore; // New-value store insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{10} = isExtendable; // Insn may be extended.
+ let TSFlags{18} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{11} = isExtended; // Insn must be extended.
+ let TSFlags{19} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{14-12} = opExtendable; // Which operand may be extended.
+ let TSFlags{22-20} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{15} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{23} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{20-16} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending.
// If an instruction is valid on a subtarget (v2-v5), set the corresponding
// bit from validSubTargets. v2 is the least significant bit.
// By default, instruction is valid on all subtargets.
SubTarget validSubTargets = HasV2SubT;
- let TSFlags{24-21} = validSubTargets.Value;
+ let TSFlags{32-29} = validSubTargets.Value;
- // Addressing mode for load/store instrutions.
+ // Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
- let TSFlags{28-25} = addrMode.Value;
+ let TSFlags{35-33} = addrMode.Value;
+
+ // Memory access size for mem access instructions (load/store)
+ MemAccessSize accessSize = NoMemAccess;
+ let TSFlags{38-36} = accessSize.Value;
// Fields used for relation models.
string BaseOpcode = "";
@@ -124,6 +160,11 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string InputType = ""; // Input is "imm" or "reg" type.
string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
string isFloat = "false"; // Set to "true" for the floating-point load/store.
+ string isBrTaken = ""; // Set to "true"/"false" for jump instructions
+
+ let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"),
+ "");
+ let PNewValue = !if(isPredicatedNew, "new", "");
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
@@ -134,187 +175,143 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>;
-class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
- let mayLoad = 1;
-}
+let mayLoad = 1 in
+class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<13> imm13;
-}
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
-class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+let mayStore = 1 in
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>;
-class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
- let mayStore = 1;
-}
+class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
-// SYSTEM Instruction Class in V4 can take SLOT0 only
-// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
-class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+let mayStore = 1 in
+class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
-class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<13> imm13;
-}
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>;
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
- bits<16> imm16_2;
-}
+class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>;
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
-class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
- bits<16> imm16_2;
-}
+class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>;
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<16> imm16;
- bits<16> imm16_2;
-}
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
-class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
-class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : MInst<outs, ins, asmstr, pattern, cstr>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
-class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
-class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> {
-// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
-// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
-}
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : SInst<outs, ins, asmstr, pattern, cstr>;
// J Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> {
- bits<16> imm16;
-}
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>;
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> {
- bits<5> rs;
- bits<5> pu; // Predicate register
-}
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>;
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
-class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> {
- bits<5> rs;
- bits<10> imm10;
-}
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>;
-class Marker<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> {
- let isCodeGenOnly = 1;
- let isPseudo = 1;
-}
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>;
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> {
- let isCodeGenOnly = 1;
- let isPseudo = 1;
-}
+let isCodeGenOnly = 1, isPseudo = 1 in
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr="">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
//===----------------------------------------------------------------------===//
// Intruction Classes Definitions -
@@ -324,75 +321,52 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
//
// ALU32 patterns
//.
-class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
-}
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
-class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU32Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
//
// ALU64 patterns.
//
-class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU64Type<outs, ins, asmstr, pattern> {
-}
-
-class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
- : ALU64Type<outs, ins, asmstr, pattern> {
- let rt{0-4} = 0;
-}
-
-// J Type Instructions.
-class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : JType<outs, ins, asmstr, pattern> {
-}
-
-// JR type Instructions.
-class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : JRType<outs, ins, asmstr, pattern> {
-}
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr = "">
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
// Post increment ST Instruction.
-class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : STInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
-}
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
-class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : STInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
- let mayStore = 1;
-}
+let mayStore = 1 in
+class STInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : STInst<outs, ins, asmstr, pattern, cstr>;
// Post increment LD Instruction.
-class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : LDInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
-}
-
-class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : LDInstPost<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
- let mayLoad = 1;
-}
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1 in
+class LDInst2PI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : LDInst<outs, ins, asmstr, pattern, cstr>;
//===----------------------------------------------------------------------===//
// V4 Instruction Format Definitions +
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 05f1e23..9fda0da 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,9 +17,9 @@
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
-def TypeMEMOP : Type<9>;
-def TypeNV : Type<10>;
-def TypePREFIX : Type<30>;
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypePREFIX : IType<30>;
//----------------------------------------------------------------------------//
// Intruction Classes Definitions +
@@ -28,36 +28,38 @@ def TypePREFIX : Type<30>;
//
// NV type instructions.
//
-class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> {
- bits<5> rd;
- bits<5> rs;
- bits<13> imm13;
-}
+class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>;
+
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
// Definition of Post increment new value store.
-class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> {
- bits<5> rd;
- bits<5> rs;
- bits<5> rt;
- bits<13> imm13;
-}
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
// Post increment ST Instruction.
-class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr>
- : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
- let rt{0-4} = 0;
-}
+let mayStore = 1 in
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+// New-value conditional branch.
+class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : NVInst<outs, ins, asmstr, pattern, cstr>;
+
+let mayLoad = 1, mayStore = 1 in
+class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>;
-class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> {
- bits<5> rd;
- bits<5> rs;
- bits<6> imm6;
-}
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "">
+ : MEMInst<outs, ins, asmstr, pattern, cstr>;
let isCodeGenOnly = 1 in
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 3b1ae09..d30cdda 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -305,6 +305,88 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
}
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Set mask and the first source register.
+ switch (Opc) {
+ case Hexagon::CMPEHexagon4rr:
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPGT64rr:
+ case Hexagon::CMPGTU64rr:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPLTrr:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = ~0;
+ break;
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPbEQrr_sbsb_V4:
+ case Hexagon::CMPbEQrr_ubub_V4:
+ case Hexagon::CMPbGTUri_V4:
+ case Hexagon::CMPbGTUrr_V4:
+ case Hexagon::CMPbGTrr_V4:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFF;
+ break;
+ case Hexagon::CMPhEQri_V4:
+ case Hexagon::CMPhEQrr_shl_V4:
+ case Hexagon::CMPhEQrr_xor_V4:
+ case Hexagon::CMPhGTUri_V4:
+ case Hexagon::CMPhGTUrr_V4:
+ case Hexagon::CMPhGTrr_shl_V4:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFFFF;
+ break;
+ }
+
+ // Set the value/second source register.
+ switch (Opc) {
+ case Hexagon::CMPEHexagon4rr:
+ case Hexagon::CMPEQrr:
+ case Hexagon::CMPGT64rr:
+ case Hexagon::CMPGTU64rr:
+ case Hexagon::CMPGTUrr:
+ case Hexagon::CMPGTrr:
+ case Hexagon::CMPbEQrr_sbsb_V4:
+ case Hexagon::CMPbEQrr_ubub_V4:
+ case Hexagon::CMPbGTUrr_V4:
+ case Hexagon::CMPbGTrr_V4:
+ case Hexagon::CMPhEQrr_shl_V4:
+ case Hexagon::CMPhEQrr_xor_V4:
+ case Hexagon::CMPhGTUrr_V4:
+ case Hexagon::CMPhGTrr_shl_V4:
+ case Hexagon::CMPLTUrr:
+ case Hexagon::CMPLTrr:
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+
+ case Hexagon::CMPEQri:
+ case Hexagon::CMPGTUri:
+ case Hexagon::CMPGTri:
+ case Hexagon::CMPbEQri_V4:
+ case Hexagon::CMPbGTUri_V4:
+ case Hexagon::CMPhEQri_V4:
+ case Hexagon::CMPhGTUri_V4:
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+
void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -344,6 +426,18 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
return;
}
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_RsPd), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ Hexagon::PredRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_PdRs), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
llvm_unreachable("Unimplemented");
}
@@ -608,30 +702,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
case Hexagon::STriw_abs_setimm_V4:
// V4 global address load.
- case Hexagon::LDrid_GP_cPt_V4 :
- case Hexagon::LDrid_GP_cNotPt_V4 :
- case Hexagon::LDrid_GP_cdnPt_V4 :
- case Hexagon::LDrid_GP_cdnNotPt_V4 :
- case Hexagon::LDrib_GP_cPt_V4 :
- case Hexagon::LDrib_GP_cNotPt_V4 :
- case Hexagon::LDrib_GP_cdnPt_V4 :
- case Hexagon::LDrib_GP_cdnNotPt_V4 :
- case Hexagon::LDriub_GP_cPt_V4 :
- case Hexagon::LDriub_GP_cNotPt_V4 :
- case Hexagon::LDriub_GP_cdnPt_V4 :
- case Hexagon::LDriub_GP_cdnNotPt_V4 :
- case Hexagon::LDrih_GP_cPt_V4 :
- case Hexagon::LDrih_GP_cNotPt_V4 :
- case Hexagon::LDrih_GP_cdnPt_V4 :
- case Hexagon::LDrih_GP_cdnNotPt_V4 :
- case Hexagon::LDriuh_GP_cPt_V4 :
- case Hexagon::LDriuh_GP_cNotPt_V4 :
- case Hexagon::LDriuh_GP_cdnPt_V4 :
- case Hexagon::LDriuh_GP_cdnNotPt_V4 :
- case Hexagon::LDriw_GP_cPt_V4 :
- case Hexagon::LDriw_GP_cNotPt_V4 :
- case Hexagon::LDriw_GP_cdnPt_V4 :
- case Hexagon::LDriw_GP_cdnNotPt_V4 :
case Hexagon::LDd_GP_cPt_V4 :
case Hexagon::LDd_GP_cNotPt_V4 :
case Hexagon::LDd_GP_cdnPt_V4 :
@@ -658,22 +728,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
case Hexagon::LDw_GP_cdnNotPt_V4 :
// V4 global address store.
- case Hexagon::STrid_GP_cPt_V4 :
- case Hexagon::STrid_GP_cNotPt_V4 :
- case Hexagon::STrid_GP_cdnPt_V4 :
- case Hexagon::STrid_GP_cdnNotPt_V4 :
- case Hexagon::STrib_GP_cPt_V4 :
- case Hexagon::STrib_GP_cNotPt_V4 :
- case Hexagon::STrib_GP_cdnPt_V4 :
- case Hexagon::STrib_GP_cdnNotPt_V4 :
- case Hexagon::STrih_GP_cPt_V4 :
- case Hexagon::STrih_GP_cNotPt_V4 :
- case Hexagon::STrih_GP_cdnPt_V4 :
- case Hexagon::STrih_GP_cdnNotPt_V4 :
- case Hexagon::STriw_GP_cPt_V4 :
- case Hexagon::STriw_GP_cNotPt_V4 :
- case Hexagon::STriw_GP_cdnPt_V4 :
- case Hexagon::STriw_GP_cdnNotPt_V4 :
case Hexagon::STd_GP_cPt_V4 :
case Hexagon::STd_GP_cNotPt_V4 :
case Hexagon::STd_GP_cdnPt_V4 :
@@ -692,18 +746,6 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
case Hexagon::STw_GP_cdnNotPt_V4 :
// V4 predicated global address new value store.
- case Hexagon::STrib_GP_cPt_nv_V4 :
- case Hexagon::STrib_GP_cNotPt_nv_V4 :
- case Hexagon::STrib_GP_cdnPt_nv_V4 :
- case Hexagon::STrib_GP_cdnNotPt_nv_V4 :
- case Hexagon::STrih_GP_cPt_nv_V4 :
- case Hexagon::STrih_GP_cNotPt_nv_V4 :
- case Hexagon::STrih_GP_cdnPt_nv_V4 :
- case Hexagon::STrih_GP_cdnNotPt_nv_V4 :
- case Hexagon::STriw_GP_cPt_nv_V4 :
- case Hexagon::STriw_GP_cNotPt_nv_V4 :
- case Hexagon::STriw_GP_cdnPt_nv_V4 :
- case Hexagon::STriw_GP_cdnNotPt_nv_V4 :
case Hexagon::STb_GP_cPt_nv_V4 :
case Hexagon::STb_GP_cNotPt_nv_V4 :
case Hexagon::STb_GP_cdnPt_nv_V4 :
@@ -1095,7 +1137,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STrib_indexed_nv_V4:
case Hexagon::STrib_indexed_shl_nv_V4:
case Hexagon::STrib_shl_nv_V4:
- case Hexagon::STrib_GP_nv_V4:
case Hexagon::STb_GP_nv_V4:
case Hexagon::POST_STbri_nv_V4:
case Hexagon::STrib_cPt_nv_V4:
@@ -1118,10 +1159,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STb_GP_cNotPt_nv_V4:
case Hexagon::STb_GP_cdnPt_nv_V4:
case Hexagon::STb_GP_cdnNotPt_nv_V4:
- case Hexagon::STrib_GP_cPt_nv_V4:
- case Hexagon::STrib_GP_cNotPt_nv_V4:
- case Hexagon::STrib_GP_cdnPt_nv_V4:
- case Hexagon::STrib_GP_cdnNotPt_nv_V4:
case Hexagon::STrib_abs_nv_V4:
case Hexagon::STrib_abs_cPt_nv_V4:
case Hexagon::STrib_abs_cdnPt_nv_V4:
@@ -1138,7 +1175,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STrih_indexed_nv_V4:
case Hexagon::STrih_indexed_shl_nv_V4:
case Hexagon::STrih_shl_nv_V4:
- case Hexagon::STrih_GP_nv_V4:
case Hexagon::STh_GP_nv_V4:
case Hexagon::POST_SThri_nv_V4:
case Hexagon::STrih_cPt_nv_V4:
@@ -1161,10 +1197,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STh_GP_cNotPt_nv_V4:
case Hexagon::STh_GP_cdnPt_nv_V4:
case Hexagon::STh_GP_cdnNotPt_nv_V4:
- case Hexagon::STrih_GP_cPt_nv_V4:
- case Hexagon::STrih_GP_cNotPt_nv_V4:
- case Hexagon::STrih_GP_cdnPt_nv_V4:
- case Hexagon::STrih_GP_cdnNotPt_nv_V4:
case Hexagon::STrih_abs_nv_V4:
case Hexagon::STrih_abs_cPt_nv_V4:
case Hexagon::STrih_abs_cdnPt_nv_V4:
@@ -1181,7 +1213,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STriw_indexed_nv_V4:
case Hexagon::STriw_indexed_shl_nv_V4:
case Hexagon::STriw_shl_nv_V4:
- case Hexagon::STriw_GP_nv_V4:
case Hexagon::STw_GP_nv_V4:
case Hexagon::POST_STwri_nv_V4:
case Hexagon::STriw_cPt_nv_V4:
@@ -1204,10 +1235,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STw_GP_cNotPt_nv_V4:
case Hexagon::STw_GP_cdnPt_nv_V4:
case Hexagon::STw_GP_cdnNotPt_nv_V4:
- case Hexagon::STriw_GP_cPt_nv_V4:
- case Hexagon::STriw_GP_cNotPt_nv_V4:
- case Hexagon::STriw_GP_cdnPt_nv_V4:
- case Hexagon::STriw_GP_cdnNotPt_nv_V4:
case Hexagon::STriw_abs_nv_V4:
case Hexagon::STriw_abs_cPt_nv_V4:
case Hexagon::STriw_abs_cdnPt_nv_V4:
@@ -1500,26 +1527,11 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
return Hexagon::JMPR_cPt;
// V4 indexed+scaled load.
- case Hexagon::LDrid_indexed_cPt_V4:
- return Hexagon::LDrid_indexed_cNotPt_V4;
- case Hexagon::LDrid_indexed_cNotPt_V4:
- return Hexagon::LDrid_indexed_cPt_V4;
-
case Hexagon::LDrid_indexed_shl_cPt_V4:
return Hexagon::LDrid_indexed_shl_cNotPt_V4;
case Hexagon::LDrid_indexed_shl_cNotPt_V4:
return Hexagon::LDrid_indexed_shl_cPt_V4;
- case Hexagon::LDrib_indexed_cPt_V4:
- return Hexagon::LDrib_indexed_cNotPt_V4;
- case Hexagon::LDrib_indexed_cNotPt_V4:
- return Hexagon::LDrib_indexed_cPt_V4;
-
- case Hexagon::LDriub_indexed_cPt_V4:
- return Hexagon::LDriub_indexed_cNotPt_V4;
- case Hexagon::LDriub_indexed_cNotPt_V4:
- return Hexagon::LDriub_indexed_cPt_V4;
-
case Hexagon::LDrib_indexed_shl_cPt_V4:
return Hexagon::LDrib_indexed_shl_cNotPt_V4;
case Hexagon::LDrib_indexed_shl_cNotPt_V4:
@@ -1530,16 +1542,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::LDriub_indexed_shl_cNotPt_V4:
return Hexagon::LDriub_indexed_shl_cPt_V4;
- case Hexagon::LDrih_indexed_cPt_V4:
- return Hexagon::LDrih_indexed_cNotPt_V4;
- case Hexagon::LDrih_indexed_cNotPt_V4:
- return Hexagon::LDrih_indexed_cPt_V4;
-
- case Hexagon::LDriuh_indexed_cPt_V4:
- return Hexagon::LDriuh_indexed_cNotPt_V4;
- case Hexagon::LDriuh_indexed_cNotPt_V4:
- return Hexagon::LDriuh_indexed_cPt_V4;
-
case Hexagon::LDrih_indexed_shl_cPt_V4:
return Hexagon::LDrih_indexed_shl_cNotPt_V4;
case Hexagon::LDrih_indexed_shl_cNotPt_V4:
@@ -1550,11 +1552,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::LDriuh_indexed_shl_cNotPt_V4:
return Hexagon::LDriuh_indexed_shl_cPt_V4;
- case Hexagon::LDriw_indexed_cPt_V4:
- return Hexagon::LDriw_indexed_cNotPt_V4;
- case Hexagon::LDriw_indexed_cNotPt_V4:
- return Hexagon::LDriw_indexed_cPt_V4;
-
case Hexagon::LDriw_indexed_shl_cPt_V4:
return Hexagon::LDriw_indexed_shl_cNotPt_V4;
case Hexagon::LDriw_indexed_shl_cNotPt_V4:
@@ -1680,26 +1677,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::STw_GP_cNotPt_V4:
return Hexagon::STw_GP_cPt_V4;
- case Hexagon::STrid_GP_cPt_V4:
- return Hexagon::STrid_GP_cNotPt_V4;
- case Hexagon::STrid_GP_cNotPt_V4:
- return Hexagon::STrid_GP_cPt_V4;
-
- case Hexagon::STrib_GP_cPt_V4:
- return Hexagon::STrib_GP_cNotPt_V4;
- case Hexagon::STrib_GP_cNotPt_V4:
- return Hexagon::STrib_GP_cPt_V4;
-
- case Hexagon::STrih_GP_cPt_V4:
- return Hexagon::STrih_GP_cNotPt_V4;
- case Hexagon::STrih_GP_cNotPt_V4:
- return Hexagon::STrih_GP_cPt_V4;
-
- case Hexagon::STriw_GP_cPt_V4:
- return Hexagon::STriw_GP_cNotPt_V4;
- case Hexagon::STriw_GP_cNotPt_V4:
- return Hexagon::STriw_GP_cPt_V4;
-
// Load.
case Hexagon::LDrid_cPt:
return Hexagon::LDrid_cNotPt;
@@ -1965,75 +1942,26 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
Hexagon::JMPR_cNotPt;
// V4 indexed+scaled load.
- case Hexagon::LDrid_indexed_V4:
- return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
- Hexagon::LDrid_indexed_cNotPt_V4;
case Hexagon::LDrid_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
Hexagon::LDrid_indexed_shl_cNotPt_V4;
- case Hexagon::LDrib_indexed_V4:
- return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
- Hexagon::LDrib_indexed_cNotPt_V4;
- case Hexagon::LDriub_indexed_V4:
- return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
- Hexagon::LDriub_indexed_cNotPt_V4;
- case Hexagon::LDriub_ae_indexed_V4:
- return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
- Hexagon::LDriub_indexed_cNotPt_V4;
case Hexagon::LDrib_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
Hexagon::LDrib_indexed_shl_cNotPt_V4;
case Hexagon::LDriub_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
Hexagon::LDriub_indexed_shl_cNotPt_V4;
- case Hexagon::LDriub_ae_indexed_shl_V4:
- return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
- Hexagon::LDriub_indexed_shl_cNotPt_V4;
- case Hexagon::LDrih_indexed_V4:
- return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
- Hexagon::LDrih_indexed_cNotPt_V4;
- case Hexagon::LDriuh_indexed_V4:
- return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
- Hexagon::LDriuh_indexed_cNotPt_V4;
- case Hexagon::LDriuh_ae_indexed_V4:
- return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
- Hexagon::LDriuh_indexed_cNotPt_V4;
case Hexagon::LDrih_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
Hexagon::LDrih_indexed_shl_cNotPt_V4;
case Hexagon::LDriuh_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
Hexagon::LDriuh_indexed_shl_cNotPt_V4;
- case Hexagon::LDriuh_ae_indexed_shl_V4:
- return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
- Hexagon::LDriuh_indexed_shl_cNotPt_V4;
- case Hexagon::LDriw_indexed_V4:
- return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
- Hexagon::LDriw_indexed_cNotPt_V4;
case Hexagon::LDriw_indexed_shl_V4:
return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
Hexagon::LDriw_indexed_shl_cNotPt_V4;
// V4 Load from global address
- case Hexagon::LDrid_GP_V4:
- return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 :
- Hexagon::LDrid_GP_cNotPt_V4;
- case Hexagon::LDrib_GP_V4:
- return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 :
- Hexagon::LDrib_GP_cNotPt_V4;
- case Hexagon::LDriub_GP_V4:
- return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 :
- Hexagon::LDriub_GP_cNotPt_V4;
- case Hexagon::LDrih_GP_V4:
- return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 :
- Hexagon::LDrih_GP_cNotPt_V4;
- case Hexagon::LDriuh_GP_V4:
- return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 :
- Hexagon::LDriuh_GP_cNotPt_V4;
- case Hexagon::LDriw_GP_V4:
- return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 :
- Hexagon::LDriw_GP_cNotPt_V4;
-
case Hexagon::LDd_GP_V4:
return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 :
Hexagon::LDd_GP_cNotPt_V4;
@@ -2116,19 +2044,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
Hexagon::STrid_indexed_shl_cNotPt_V4;
// V4 Store to global address
- case Hexagon::STrid_GP_V4:
- return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 :
- Hexagon::STrid_GP_cNotPt_V4;
- case Hexagon::STrib_GP_V4:
- return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 :
- Hexagon::STrib_GP_cNotPt_V4;
- case Hexagon::STrih_GP_V4:
- return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 :
- Hexagon::STrih_GP_cNotPt_V4;
- case Hexagon::STriw_GP_V4:
- return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 :
- Hexagon::STriw_GP_cNotPt_V4;
-
case Hexagon::STd_GP_V4:
return !invertPredicate ? Hexagon::STd_GP_cPt_V4 :
Hexagon::STd_GP_cNotPt_V4;
@@ -2215,38 +2130,141 @@ PredicateInstruction(MachineInstr *MI,
assert (isPredicable(MI) && "Expected predicable instruction");
bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
(Cond[0].getImm() == 0));
+
+ // This will change MI's opcode to its predicate version.
+ // However, its operand list is still the old one, i.e. the
+ // non-predicate one.
MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
- //
- // This assumes that the predicate is always the first operand
- // in the set of inputs.
- //
- MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
- int oper;
- for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
- MachineOperand MO = MI->getOperand(oper);
- if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
- break;
- }
- if (MO.isReg()) {
- MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
- MO.isImplicit(), MO.isKill(),
- MO.isDead(), MO.isUndef(),
- MO.isDebug());
- } else if (MO.isImm()) {
- MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
- } else {
- llvm_unreachable("Unexpected operand type");
+ int oper = -1;
+ unsigned int GAIdx = 0;
+
+ // Indicates whether the current MI has a GlobalAddress operand
+ bool hasGAOpnd = false;
+ std::vector<MachineOperand> tmpOpnds;
+
+ // Indicates whether we need to shift operands to right.
+ bool needShift = true;
+
+ // The predicate is ALWAYS the FIRST input operand !!!
+ if (MI->getNumOperands() == 0) {
+ // The non-predicate version of MI does not take any operands,
+ // i.e. no outs and no ins. In this condition, the predicate
+ // operand will be directly placed at Operands[0]. No operand
+ // shift is needed.
+ // Example: BARRIER
+ needShift = false;
+ oper = -1;
+ }
+ else if ( MI->getOperand(MI->getNumOperands()-1).isReg()
+ && MI->getOperand(MI->getNumOperands()-1).isDef()
+ && !MI->getOperand(MI->getNumOperands()-1).isImplicit()) {
+ // The non-predicate version of MI does not have any input operands.
+ // In this condition, we extend the length of Operands[] by one and
+ // copy the original last operand to the newly allocated slot.
+ // At this moment, it is just a place holder. Later, we will put
+ // predicate operand directly into it. No operand shift is needed.
+ // Example: r0=BARRIER (this is a faked insn used here for illustration)
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ needShift = false;
+ oper = MI->getNumOperands() - 2;
+ }
+ else {
+ // We need to right shift all input operands by one. Duplicate the
+ // last operand into the newly allocated slot.
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ }
+
+ if (needShift)
+ {
+ // Operands[ MI->getNumOperands() - 2 ] has been copied into
+ // Operands[ MI->getNumOperands() - 1 ], so we start from
+ // Operands[ MI->getNumOperands() - 3 ].
+ // oper is a signed int.
+ // It is ok if "MI->getNumOperands()-3" is -3, -2, or -1.
+ for (oper = MI->getNumOperands() - 3; oper >= 0; --oper)
+ {
+ MachineOperand &MO = MI->getOperand(oper);
+
+ // Opnd[0] Opnd[1] Opnd[2] Opnd[3] Opnd[4] Opnd[5] Opnd[6] Opnd[7]
+ // <Def0> <Def1> <Use0> <Use1> <ImpDef0> <ImpDef1> <ImpUse0> <ImpUse1>
+ // /\~
+ // /||\~
+ // ||
+ // Predicate Operand here
+ if (MO.isReg() && !MO.isUse() && !MO.isImplicit()) {
+ break;
+ }
+ if (MO.isReg()) {
+ MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+ MO.isImplicit(), MO.isKill(),
+ MO.isDead(), MO.isUndef(),
+ MO.isDebug());
+ }
+ else if (MO.isImm()) {
+ MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+ }
+ else if (MO.isGlobal()) {
+ // MI can not have more than one GlobalAddress operand.
+ assert(hasGAOpnd == false && "MI can only have one GlobalAddress opnd");
+
+ // There is no member function called "ChangeToGlobalAddress" in the
+ // MachineOperand class (not like "ChangeToRegister" and
+ // "ChangeToImmediate"). So we have to remove them from Operands[] list
+ // first, and then add them back after we have inserted the predicate
+ // operand. tmpOpnds[] is to remember these operands before we remove
+ // them.
+ tmpOpnds.push_back(MO);
+
+ // Operands[oper] is a GlobalAddress operand;
+ // Operands[oper+1] has been copied into Operands[oper+2];
+ hasGAOpnd = true;
+ GAIdx = oper;
+ continue;
+ }
+ else {
+ assert(false && "Unexpected operand type");
+ }
}
}
int regPos = invertJump ? 1 : 0;
MachineOperand PredMO = Cond[regPos];
+
+ // [oper] now points to the last explicit Def. Predicate operand must be
+ // located at [oper+1]. See diagram above.
+ // This assumes that the predicate is always the first operand,
+ // i.e. Operands[0+numResults], in the set of inputs
+ // It is better to have an assert here to check this. But I don't know how
+ // to write this assert because findFirstPredOperandIdx() would return -1
+ if (oper < -1) oper = -1;
MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
PredMO.isImplicit(), PredMO.isKill(),
PredMO.isDead(), PredMO.isUndef(),
PredMO.isDebug());
+ if (hasGAOpnd)
+ {
+ unsigned int i;
+
+ // Operands[GAIdx] is the original GlobalAddress operand, which is
+ // already copied into tmpOpnds[0].
+ // Operands[GAIdx] now stores a copy of Operands[GAIdx-1]
+ // Operands[GAIdx+1] has already been copied into Operands[GAIdx+2],
+ // so we start from [GAIdx+2]
+ for (i = GAIdx + 2; i < MI->getNumOperands(); ++i)
+ tmpOpnds.push_back(MI->getOperand(i));
+
+ // Remove all operands in range [ (GAIdx+1) ... (MI->getNumOperands()-1) ]
+ // It is very important that we always remove from the end of Operands[]
+ // MI->getNumOperands() is at least 2 if program goes to here.
+ for (i = MI->getNumOperands() - 1; i > GAIdx; --i)
+ MI->RemoveOperand(i);
+
+ for (i = 0; i < tmpOpnds.size(); ++i)
+ MI->addOperand(tmpOpnds[i]);
+ }
+
return true;
}
@@ -2352,7 +2370,9 @@ isValidOffset(const int Opcode, const int Offset) const {
switch(Opcode) {
case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
case Hexagon::LDriw_f:
+ case Hexagon::STriw_indexed:
case Hexagon::STriw:
case Hexagon::STriw_f:
assert((Offset % 4 == 0) && "Offset has incorrect alignment");
@@ -2360,8 +2380,10 @@ isValidOffset(const int Opcode, const int Offset) const {
(Offset <= Hexagon_MEMW_OFFSET_MAX);
case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
case Hexagon::LDrid_f:
case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
case Hexagon::STrid_f:
assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
@@ -2435,6 +2457,9 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::LDriw_pred:
return true;
+ case Hexagon::LOOP0_i:
+ return isUInt<10>(Offset);
+
// INLINEASM is very special.
case Hexagon::INLINEASM:
return true;
@@ -2643,28 +2668,16 @@ isConditionalLoad (const MachineInstr* MI) const {
case Hexagon::POST_LDriub_cPt :
case Hexagon::POST_LDriub_cNotPt :
return QRI.Subtarget.hasV4TOps();
- case Hexagon::LDrid_indexed_cPt_V4 :
- case Hexagon::LDrid_indexed_cNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cPt_V4 :
case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
- case Hexagon::LDrib_indexed_cPt_V4 :
- case Hexagon::LDrib_indexed_cNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cPt_V4 :
case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriub_indexed_cPt_V4 :
- case Hexagon::LDriub_indexed_cNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cPt_V4 :
case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
- case Hexagon::LDrih_indexed_cPt_V4 :
- case Hexagon::LDrih_indexed_cNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cPt_V4 :
case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriuh_indexed_cPt_V4 :
- case Hexagon::LDriuh_indexed_cNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cPt_V4 :
case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
- case Hexagon::LDriw_indexed_cPt_V4 :
- case Hexagon::LDriw_indexed_cNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cPt_V4 :
case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
return QRI.Subtarget.hasV4TOps();
@@ -2747,14 +2760,6 @@ isConditionalStore (const MachineInstr* MI) const {
return QRI.Subtarget.hasV4TOps();
// V4 global address store before promoting to dot new.
- case Hexagon::STrid_GP_cPt_V4 :
- case Hexagon::STrid_GP_cNotPt_V4 :
- case Hexagon::STrib_GP_cPt_V4 :
- case Hexagon::STrib_GP_cNotPt_V4 :
- case Hexagon::STrih_GP_cPt_V4 :
- case Hexagon::STrih_GP_cNotPt_V4 :
- case Hexagon::STriw_GP_cPt_V4 :
- case Hexagon::STriw_GP_cNotPt_V4 :
case Hexagon::STd_GP_cPt_V4 :
case Hexagon::STd_GP_cNotPt_V4 :
case Hexagon::STb_GP_cPt_V4 :
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 29e3eb1..4e36dfb 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -66,6 +66,10 @@ public:
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ virtual bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const;
+
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 8b183b9..082772a 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -808,7 +808,7 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
// JR +
//===----------------------------------------------------------------------===//
def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
// Jump to address from register.
let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
@@ -1195,57 +1195,65 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// Multiply and use lower result.
// Rd=+mpyi(Rs,#u8)
-def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Ext:$src2),
"$dst =+ mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- u8ImmPred:$src2))]>;
+ u8ExtPred:$src2))]>;
// Rd=-mpyi(Rs,#u8)
-def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
"$dst =- mpyi($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- n8ImmPred:$src2))]>;
+ [(set (i32 IntRegs:$dst), (ineg (mul (i32 IntRegs:$src1),
+ u8ImmPred:$src2)))]>;
// Rd=mpyi(Rs,#m9)
// s9 is NOT the same as m9 - but it works.. so far.
// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
// depending on the value of m9. See Arch Spec.
-def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
+CextOpcode = "MPYI", InputType = "imm" in
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
"$dst = mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s9ImmPred:$src2))]>;
+ s9ExtPred:$src2))]>, ImmRegRel;
// Rd=mpyi(Rs,Rt)
+let CextOpcode = "MPYI", InputType = "reg" in
def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
"$dst = mpyi($src1, $src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
+ (i32 IntRegs:$src2)))]>, ImmRegRel;
// Rx+=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8,
+CextOpcode = "MPYI_acc", InputType = "imm" in
def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
"$dst += mpyi($src2, #$src3)",
[(set (i32 IntRegs:$dst),
- (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3),
+ (add (mul (i32 IntRegs:$src2), u8ExtPred:$src3),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
// Rx+=mpyi(Rs,Rt)
+let CextOpcode = "MPYI_acc", InputType = "reg" in
def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += mpyi($src2, $src3)",
[(set (i32 IntRegs:$dst),
(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
// Rx-=mpyi(Rs,#u8)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 8 in
def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Ext:$src3),
"$dst -= mpyi($src2, #$src3)",
[(set (i32 IntRegs:$dst),
(sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- u8ImmPred:$src3)))],
+ u8ExtPred:$src3)))],
"$src1 = $dst">;
// Multiply and use upper result.
@@ -1314,7 +1322,7 @@ def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
// Rxx-=mpyu(Rs,Rt)
def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst += mpyu($src2, $src3)",
+ "$dst -= mpyu($src2, $src3)",
[(set (i64 DoubleRegs:$dst),
(sub (i64 DoubleRegs:$src1),
(mul (i64 (anyext (i32 IntRegs:$src2))),
@@ -1322,37 +1330,43 @@ def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
"$src1 = $dst">;
+let InputType = "reg", CextOpcode = "ADD_acc" in
def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst += add($src2, $src3)",
[(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
(i32 IntRegs:$src3)),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+InputType = "imm", CextOpcode = "ADD_acc" in
def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, s8Imm:$src3),
+ IntRegs:$src2, s8Ext:$src3),
"$dst += add($src2, #$src3)",
[(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2),
- s8ImmPred:$src3),
+ s8_16ExtPred:$src3),
(i32 IntRegs:$src1)))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
+let CextOpcode = "SUB_acc", InputType = "reg" in
def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
IntRegs:$src2, IntRegs:$src3),
"$dst -= add($src2, $src3)",
[(set (i32 IntRegs:$dst),
(sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
- "$src1 = $dst">;
+ "$src1 = $dst">, ImmRegRel;
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "SUB_acc", InputType = "imm" in
def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
- IntRegs:$src2, s8Imm:$src3),
+ IntRegs:$src2, s8Ext:$src3),
"$dst -= add($src2, #$src3)",
[(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1),
(add (i32 IntRegs:$src2),
- s8ImmPred:$src3)))],
- "$src1 = $dst">;
+ s8_16ExtPred:$src3)))],
+ "$src1 = $dst">, ImmRegRel;
//===----------------------------------------------------------------------===//
// MTYPE/MPYH -
@@ -1405,35 +1419,71 @@ def STd_GP : STInst2<(outs),
[]>,
Requires<[NoV4T]>;
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STdri : STInstPI<(outs IntRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
- "memd($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2),
- s4_3ImmPred:$offset))],
- "$src2 = $dst">;
+//===----------------------------------------------------------------------===//
+// Post increment store
+//===----------------------------------------------------------------------===//
-// if ([!]Pv) memd(Rx++#s4:3)=Rtt
-// if (Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if ($src1) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">;
-
-// if (!Pv) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1,
- isPredicated = 1 in
-def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if (!$src1) memd($src3++#$offset) = $src2",
+multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : STInst2PI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3",
[],
- "$src3 = $dst">;
+ "$src2 = $dst">;
+}
+
+multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME#_V4 : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
+
+let hasCtrlDep = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let hasCtrlDep = 1, BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME : STInst2PI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ #mnemonic#"($src1++#$offset) = $src2",
+ [],
+ "$src1 = $dst">;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : ST_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
+
+defm POST_STbri: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+
+let isNVStorable = 0 in
+defm POST_STdri: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm>, AddrModeRel;
+
+def : Pat<(post_truncsti8 (i32 IntRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_STbri IntRegs:$src2, s4_0ImmPred:$offset, IntRegs:$src1)>;
+
+def : Pat<(post_truncsti16 (i32 IntRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_SThri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
+
+def : Pat<(post_store (i32 IntRegs:$src1), IntRegs:$src2, s4_2ImmPred:$offset),
+ (POST_STwri IntRegs:$src2, s4_1ImmPred:$offset, IntRegs:$src1)>;
+
+def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
+ s4_3ImmPred:$offset),
+ (POST_STdri IntRegs:$src2, s4_3ImmPred:$offset, DoubleRegs:$src1)>;
//===----------------------------------------------------------------------===//
// multiclass for the store instructions with MEMri operand.
@@ -1595,32 +1645,6 @@ def STb_GP : STInst2<(outs),
[]>,
Requires<[NoV4T]>;
-// memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
- IntRegs:$src2,
- s4Imm:$offset),
- "memb($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
- s4_0ImmPred:$offset))],
- "$src2 = $dst">;
-
-// if ([!]Pv) memb(Rx++#s4:0)=Rt
-// if (Pv) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
-// if (!Pv) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
let neverHasSideEffects = 1 in
def STrih_GP : STInst2<(outs),
(ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
@@ -1636,31 +1660,6 @@ def STh_GP : STInst2<(outs),
Requires<[NoV4T]>;
// memh(Rx++#s4:1)=Rt.H
-// memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_SThri : STInstPI<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
- "memh($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2),
- s4_1ImmPred:$offset))],
- "$src2 = $dst">;
-
-// if ([!]Pv) memh(Rx++#s4:1)=Rt
-// if (Pv) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
-// if (!Pv) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
// Store word.
// Store predicate.
@@ -1684,32 +1683,6 @@ def STw_GP : STInst2<(outs),
[]>,
Requires<[NoV4T]>;
-let hasCtrlDep = 1, isPredicable = 1 in
-def POST_STwri : STInstPI<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
- "memw($src2++#$offset) = $src1",
- [(set IntRegs:$dst,
- (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2),
- s4_2ImmPred:$offset))],
- "$src2 = $dst">;
-
-// if ([!]Pv) memw(Rx++#s4:2)=Rt
-// if (Pv) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
-// if (!Pv) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1, isPredicated = 1 in
-def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">;
-
-
-
// Allocate stack frame.
let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
def ALLOCFRAME : STInst2<(outs),
@@ -1912,7 +1885,7 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
[SDNPHasChain]>;
-let hasSideEffects = 1, isHexagonSolo = 1 in
+let hasSideEffects = 1, isSolo = 1 in
def BARRIER : SYSInst<(outs), (ins),
"barrier",
[(HexagonBARRIER)]>;
@@ -1987,9 +1960,9 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset),
- ":endloop0",
- []>;
+def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
}
// Support for generating global address.
@@ -2852,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
// i1 -> i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>;
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ Requires<[NoV4T]>;
// i32 -> i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ Requires<[NoV4T]>;
// i8 -> i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// i16 -> i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// i32 -> i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
(i32 (LDriw ADDRriS11_0:$src1))>;
@@ -2889,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
// Any extended 64-bit load.
// anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+// When there is an offset we should prefer the pattern below over the pattern above.
+// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
+// So this complexity below is comfortably higher to allow for choosing the below.
+// If this is not done then we generate addresses such as
+// ********************************************
+// r1 = add (r0, #4)
+// r1 = memw(r1 + #0)
+// instead of
+// r1 = memw(r0 + #4)
+// ********************************************
+let AddedComplexity = 100 in
+def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// anyext i16 -> i64.
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
- (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
- (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
+ (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
+ Requires<[NoV4T]>;
// Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 372de9a..e1b2f88 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -21,6 +21,17 @@ def IMMEXT_c : T_Immext<(ins calltarget:$imm)>;
def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>;
def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>;
+// Fold (add (CONST32 tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddr : ComplexPattern<i32, 1, "foldGlobalAddress", [], []>;
+
+// Fold (add (CONST32_GP tglobaladdr:$addr) <offset>) into a global address.
+def FoldGlobalAddrGP : ComplexPattern<i32, 1, "foldGlobalAddressGP", [], []>;
+
+def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr),
+ (HexagonCONST32 node:$addr), [{
+ return hasNumUsesBelowThresGA(N->getOperand(0).getNode());
+}]>;
+
// Hexagon V4 Architecture spec defines 8 instruction classes:
// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
// compiler)
@@ -251,6 +262,54 @@ def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
[]>,
Requires<[HasV4T]>;
+// Rd=cmp.eq(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpeqi : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = cmp.eq($Rs, #$s8)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+ s8ExtPred:$s8)))))]>,
+ Requires<[HasV4T]>;
+
+// Preserve the TSTBIT generation
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src1))), 0)))),
+ (i32 (MUX_ii (i1 (TSTBIT_rr (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ 1, 0))>;
+
+// Interfered with tstbit generation, above pattern preserves, see : tstbit.ll
+// Rd=cmp.ne(Rs,#s8)
+let validSubTargets = HasV4SubT, isExtendable = 1, opExtendable = 2,
+isExtentSigned = 1, opExtentBits = 8 in
+def V4_A4_rcmpneqi : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Rd = !cmp.eq($Rs, #$s8)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+ s8ExtPred:$s8)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.eq(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpeq : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = cmp.eq($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (seteq (i32 IntRegs:$Rs),
+ IntRegs:$Rt)))))]>,
+ Requires<[HasV4T]>;
+
+// Rd=cmp.ne(Rs,Rt)
+let validSubTargets = HasV4SubT in
+def V4_A4_rcmpneq : ALU32_ri<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$Rt),
+ "$Rd = !cmp.eq($Rs, $Rt)",
+ [(set (i32 IntRegs:$Rd),
+ (i32 (zext (i1 (setne (i32 IntRegs:$Rs),
+ IntRegs:$Rt)))))]>,
+ Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
// ALU32 -
@@ -280,6 +339,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
[]>,
Requires<[HasV4T]>;
+def HexagonWrapperCombineRI_V4 :
+ SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineIR_V4 :
+ SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+
+def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
+ (COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
+ Requires<[HasV4T]>;
+
+def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
+ (COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
+ Requires<[HasV4T]>;
+
let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
@@ -299,120 +371,95 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
// These absolute set addressing mode instructions accept immediate as
// an operand. We have duplicated these patterns to take global address.
-let neverHasSideEffects = 1 in
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memd($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memd($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memb(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memb($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memb($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memh($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memub(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memub($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memub($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memuh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memuh($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memuh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memw(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u6Imm:$addr),
- "$dst1 = memw($dst2=#$addr)",
+ (ins u0AlwaysExt:$addr),
+ "$dst1 = memw($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
+}
// Following patterns are defined for absolute set addressing mode
// instruction which take global address as operand.
-let neverHasSideEffects = 1 in
+let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memd($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memb(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memb($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memub(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memub($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memuh(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memuh($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
// Rd=memw(Re=#U6)
-let neverHasSideEffects = 1 in
def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdress:$addr),
+ (ins globaladdressExt:$addr),
"$dst1 = memw($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
-
-// Load doubleword.
-//
-// Make sure that in post increment load, the first operand is always the post
-// increment operand.
-//
-// Rdd=memd(Rs+Rt<<#u2)
-// Special case pattern for indexed load without offset which is easier to
-// match. AddedComplexity of this pattern should be lower than base+offset load
-// and lower yet than the more generic version with offset/shift below
-// Similar approach is taken for all other base+index loads.
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memd($src1+$src2<<#0)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (load (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
+}
// multiclass for load instructions with base + register offset
// addressing mode
@@ -512,534 +559,42 @@ def : Pat <(i64 (load (add IntRegs:$src1,
Requires<[HasV4T]>;
}
-//// Load doubleword conditionally.
-// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
-// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// Rdd=memd(Rt<<#u2+#U6)
-
-//// Load byte.
-// Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memb($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi8 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memub($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi8 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memub($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi8 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memub($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi8 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-//// Load byte conditionally.
-// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
-// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-//// Load unsigned byte conditionally.
-// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
-// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memb(Rt<<#u2+#U6)
-
-//// Load halfword
-// Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memh($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (sextloadi16 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memuh($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (zextloadi16 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memuh($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi16 (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
- "$dst=memuh($src1+$src2<<#$offset)",
- [(set (i32 IntRegs:$dst),
- (i32 (extloadi16 (add (i32 IntRegs:$src1),
- (shl (i32 IntRegs:$src2),
- u2ImmPred:$offset)))))]>,
- Requires<[HasV4T]>;
-
-//// Load halfword conditionally.
-// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-//// Load unsigned halfword conditionally.
-// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
-// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memh(Rt<<#u2+#U6)
-
-//// Load word.
-// Load predicate: Fix for bug 5279.
-let neverHasSideEffects = 1 in
-def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst),
- (ins MEMri:$addr),
- "Error; should not emit",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memw(Re=#U6)
-
-// Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 10, isPredicable = 1 in
-def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst=memw($src1+$src2<<#0)",
- [(set (i32 IntRegs:$dst),
- (i32 (load (add (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))))]>,
- Requires<[HasV4T]>;
-
-//// Load word conditionally.
-// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
-// if (Pv) Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 15, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
- []>,
- Requires<[HasV4T]>;
-
-/// Load from global offset
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memd(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memd(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memd(##$global+$offset)",
- []>,
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+let AddedComplexity = 10 in {
+def : Pat <(i64 (load (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memd(##$global+$offset)",
- []>,
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memd(##$global+$offset)",
- []>,
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memb(#$global+$offset)",
- []>,
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriub_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memb(##$global+$offset)",
- []>,
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memb(##$global+$offset)",
- []>,
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memb(##$global+$offset)",
- []>,
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriuh_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memb(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memub(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memub(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memh(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memuh(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memuh(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$global, u16Imm:$offset),
- "$dst=memw(#$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1) $dst=memw(##$global+$offset)",
- []>,
+def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))),
+ (LDriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2, 0)>,
Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if ($src1.new) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
- "if (!$src1.new) $dst=memw(##$global+$offset)",
- []>,
- Requires<[HasV4T]>;
-
+}
let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst),
@@ -1364,82 +919,73 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
(i32 (LDw_GP_V4 tglobaladdr:$global))>,
Requires<[HasV4T]>;
-def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+// zext i1->i64
+def : Pat <(i64 (zext (i1 PredRegs:$src1))),
+ (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
+ Requires<[HasV4T]>;
+
+// zext i32->i64
+def : Pat <(i64 (zext (i32 IntRegs:$src1))),
+ (i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
+ Requires<[HasV4T]>;
+// zext i8->i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
+ s11_0ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
+ s11_0ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i16->i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// anyext i16->i64
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 20 in
+def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
+ s11_1ExtPred:$offset))),
+ (i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
+ s11_1ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
+
+// zext i32->i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memb(#foo + x)
let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memub(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+// anyext i32->i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memuh(#foo + x)
let AddedComplexity = 100 in
-def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
+def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[HasV4T]>;
-// Map from load(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-
-// Map from load(globaladdress + x) -> memuh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
-
-// Map from load(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset))),
- (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
- Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
@@ -1457,62 +1003,65 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
/// last operand.
///
-// memd(Re=#U6)=Rtt
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins DoubleRegs:$src1, u6Imm:$src2),
- "memd($dst1=#$src2) = $src1",
+ (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
+ "memd($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memb(Re=#U6)=Rs
+// memb(Re=#U)=Rs
def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u6Imm:$src2),
- "memb($dst1=#$src2) = $src1",
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memb($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memh(Re=#U6)=Rs
+// memh(Re=#U)=Rs
def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u6Imm:$src2),
- "memh($dst1=#$src2) = $src1",
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memh($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memw(Re=#U6)=Rs
+// memw(Re=#U)=Rs
def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u6Imm:$src2),
- "memw($dst1=#$src2) = $src1",
+ (ins IntRegs:$src1, u0AlwaysExt:$src2),
+ "memw($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
+}
-// memd(Re=#U6)=Rtt
+// memd(Re=#U)=Rtt
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins DoubleRegs:$src1, globaladdress:$src2),
+ (ins DoubleRegs:$src1, globaladdressExt:$src2),
"memd($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memb(Re=#U6)=Rs
+// memb(Re=#U)=Rs
def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdress:$src2),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
"memb($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memh(Re=#U6)=Rs
+// memh(Re=#U)=Rs
def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdress:$src2),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
"memh($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memw(Re=#U6)=Rs
+// memw(Re=#U)=Rs
def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdress:$src2),
+ (ins IntRegs:$src1, globaladdressExt:$src2),
"memw($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-
+}
// multiclass for store instructions with base + register offset addressing
// mode
@@ -1632,13 +1181,14 @@ def : Pat<(store (i64 DoubleRegs:$src4),
}
// memd(Ru<<#u2+#U6)=Rtt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
+validSubTargets = HasV4SubT in
def STrid_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
"memd($src1<<#$src2+#$src3) = $src4",
[(store (i64 DoubleRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memd(Rx++#s4:3)=Rtt
@@ -1652,34 +1202,12 @@ def STrid_shl_V4 : STInst<(outs),
// if ([!]Pv[.new]) memd(#u6)=Rtt
// TODO: needs to be implemented.
-// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
-// if (Pv) memd(Rx++#s4:3)=Rtt
-// if (Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if ($src1.new) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memd(Rx++#s4:3)=Rtt
-// if (!Pv.new) memd(Rx++#s4:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
- isPredicated = 1 in
-def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
- s4_3Imm:$offset),
- "if (!$src1.new) memd($src3++#$offset) = $src2",
- [],
- "$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
+//===----------------------------------------------------------------------===//
// multiclass for store instructions with base + immediate offset
// addressing mode and immediate stored value.
+// mem[bhw](Rx++#s4:3)=#s8
+// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
+//===----------------------------------------------------------------------===//
multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
bit isPredNew> {
let PNewValue = !if(isPredNew, "new", "") in
@@ -1718,9 +1246,9 @@ multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
let addrMode = BaseImmOffset, InputType = "imm",
validSubTargets = HasV4SubT in {
- defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel;
- defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel;
- defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel;
+ defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel, PredNewRel;
+ defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel, PredNewRel;
+ defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel, PredNewRel;
}
let Predicates = [HasV4T], AddedComplexity = 10 in {
@@ -1741,13 +1269,14 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
Requires<[HasV4T]>;
// memb(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STrib_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memb($src1<<#$src2+#$src3) = $src4",
[(truncstorei8 (i32 IntRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memb(Rx++#s4:0:circ(Mu))=Rt
@@ -1757,30 +1286,6 @@ def STrib_shl_V4 : STInst<(outs),
// memb(gp+#u16:0)=Rt
-// Store byte conditionally.
-// if ([!]Pv[.new]) memb(#u6)=Rt
-// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
-// if (Pv) memb(Rx++#s4:0)=Rt
-// if (Pv.new) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1.new) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rx++#s4:0)=Rt
-// if (!Pv.new) memb(Rx++#s4:0)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1.new) memb($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
// Store halfword.
// TODO: needs to be implemented
// memh(Re=#U6)=Rt.H
@@ -1795,13 +1300,14 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// memh(Ru<<#u2+#U6)=Rt.H
// memh(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STrih_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memh($src1<<#$src2+#$src3) = $src4",
[(truncstorei16 (i32 IntRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memh(Rx++#s4:1:circ(Mu))=Rt.H
@@ -1823,28 +1329,6 @@ def STrih_shl_V4 : STInst<(outs),
// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
// TODO: Needs to be implemented.
-// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
-// if (Pv) memh(Rx++#s4:1)=Rt
-// if (Pv.new) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1.new) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rx++#s4:1)=Rt
-// if (!Pv.new) memh(Rx++#s4:1)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1.new) memh($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
// Store word.
// memw(Re=#U6)=Rt
// TODO: Needs to be implemented.
@@ -1863,13 +1347,14 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
Requires<[HasV4T]>;
// memw(Ru<<#u2+#U6)=Rt
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STriw_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memw($src1<<#$src2+#$src3) = $src4",
[(store (i32 IntRegs:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u6ImmPred:$src3))]>,
+ u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
// memw(Rx++#s4:2)=Rt
@@ -1880,188 +1365,9 @@ def STriw_shl_V4 : STInst<(outs),
// memw(gp+#u16:2)=Rt
-// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
-// if (Pv) memw(Rx++#s4:2)=Rt
-// if (Pv.new) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1.new) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rx++#s4:2)=Rt
-// if (!Pv.new) memw(Rx++#s4:2)=Rt
-let hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1.new) memw($src3++#$offset) = $src2",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
-/// store to global address
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrid_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
- "memd(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if ($src1) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if (!$src1) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if ($src1.new) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrid_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- DoubleRegs:$src2),
- "if (!$src1.new) memd(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrib_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memb(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memb(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STrih_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memh(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memh(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let isPredicable = 1, neverHasSideEffects = 1 in
-def STriw_GP_V4 : STInst2<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memw(#$global+$offset) = $src",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_GP_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memw(##$global+$offset) = $src2",
- []>,
- Requires<[HasV4T]>;
-
// memd(#global)=Rtt
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in
def STd_GP_V4 : STInst2<(outs),
(ins globaladdress:$global, DoubleRegs:$src),
"memd(#$global) = $src",
@@ -2069,7 +1375,8 @@ def STd_GP_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1,
+isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
def STd_GP_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
"if ($src1) memd(##$global) = $src2",
@@ -2077,7 +1384,6 @@ def STd_GP_cPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STd_GP_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
"if (!$src1) memd(##$global) = $src2",
@@ -2085,7 +1391,6 @@ def STd_GP_cNotPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STd_GP_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
"if ($src1.new) memd(##$global) = $src2",
@@ -2093,15 +1398,16 @@ def STd_GP_cdnPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memd(##global) = Rtt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STd_GP_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
"if (!$src1.new) memd(##$global) = $src2",
[]>,
Requires<[HasV4T]>;
+}
// memb(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STb_GP_V4 : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memb(#$global) = $src",
@@ -2109,7 +1415,8 @@ def STb_GP_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
+isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
def STb_GP_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1) memb(##$global) = $src2",
@@ -2117,7 +1424,6 @@ def STb_GP_cPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STb_GP_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1) memb(##$global) = $src2",
@@ -2125,7 +1431,6 @@ def STb_GP_cNotPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STb_GP_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1.new) memb(##$global) = $src2",
@@ -2133,15 +1438,16 @@ def STb_GP_cdnPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STb_GP_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1.new) memb(##$global) = $src2",
[]>,
Requires<[HasV4T]>;
+}
// memh(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STh_GP_V4 : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memh(#$global) = $src",
@@ -2149,7 +1455,8 @@ def STh_GP_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
+isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
def STh_GP_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1) memh(##$global) = $src2",
@@ -2157,7 +1464,6 @@ def STh_GP_cPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STh_GP_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1) memh(##$global) = $src2",
@@ -2165,7 +1471,6 @@ def STh_GP_cNotPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STh_GP_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1.new) memh(##$global) = $src2",
@@ -2173,15 +1478,16 @@ def STh_GP_cdnPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STh_GP_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1.new) memh(##$global) = $src2",
[]>,
Requires<[HasV4T]>;
+}
// memw(#global)=Rt
-let isPredicable = 1, neverHasSideEffects = 1 in
+let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1,
+validSubTargets = HasV4SubT in
def STw_GP_V4 : STInst2<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memw(#$global) = $src",
@@ -2189,7 +1495,8 @@ def STw_GP_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
+let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1,
+isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
def STw_GP_cPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1) memw(##$global) = $src2",
@@ -2197,7 +1504,6 @@ def STw_GP_cPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STw_GP_cNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1) memw(##$global) = $src2",
@@ -2205,7 +1511,6 @@ def STw_GP_cNotPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STw_GP_cdnPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1.new) memw(##$global) = $src2",
@@ -2213,12 +1518,12 @@ def STw_GP_cdnPt_V4 : STInst2<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(##global) = Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
def STw_GP_cdnNotPt_V4 : STInst2<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1.new) memw(##$global) = $src2",
[]>,
Requires<[HasV4T]>;
+}
// 64 bit atomic store
def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
@@ -2277,72 +1582,6 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
(STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
Requires<[HasV4T]>;
-def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i64 DoubleRegs:$src1)),
- (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset),
- (i32 IntRegs:$src1)),
- (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memd(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i64 DoubleRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i64 DoubleRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memb(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memh(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-// Map from store(globaladdress + x) -> memw(#foo + x)
-let AddedComplexity = 100 in
-def : Pat<(store (i32 IntRegs:$src1),
- (add (HexagonCONST32_GP tglobaladdr:$global),
- u16ImmPred:$offset)),
- (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
- (i32 IntRegs:$src1))>,
- Requires<[HasV4T]>;
-
-
-
//===----------------------------------------------------------------------===
// ST -
//===----------------------------------------------------------------------===
@@ -2456,35 +1695,72 @@ mayStore = 1 in {
}
// memb(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
def STrib_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memb($src1<<#$src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
-// memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
-def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
- "memb($src2++#$offset) = $src1.new",
+//===----------------------------------------------------------------------===//
+// Post increment store
+// mem[bhwd](Rx++#s4:[0123])=Nt.new
+//===----------------------------------------------------------------------===//
+
+multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"($src2++#$offset) = $src3.new",
[],
"$src2 = $dst">,
Requires<[HasV4T]>;
+}
+
+multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
+
+let hasCtrlDep = 1, isNVStore = 1, neverHasSideEffects = 1 in
+multiclass ST_PostInc_nv<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
+ mnemonic#"($src1++#$offset) = $src2.new",
+ [],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+ let isPredicated = 1 in {
+ defm Pt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : ST_PostInc_Pred_nv<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
+
+let validSubTargets = HasV4SubT in {
+defm POST_STbri: ST_PostInc_nv <"memb", "STrib", IntRegs, s4_0Imm>, AddrModeRel;
+defm POST_SThri: ST_PostInc_nv <"memh", "STrih", IntRegs, s4_1Imm>, AddrModeRel;
+defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
+}
// memb(Rx++#s4:0:circ(Mu))=Nt.new
// memb(Rx++I:circ(Mu))=Nt.new
// memb(Rx++Mu)=Nt.new
// memb(Rx++Mu:brev)=Nt.new
-// memb(gp+#u16:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memb(#$global+$offset) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
// memb(#global)=Nt.new
let mayStore = 1, neverHasSideEffects = 1 in
def STb_GP_nv_V4 : NVInst_V4<(outs),
@@ -2493,73 +1769,20 @@ def STb_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
-// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
-// if (Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if ($src1.new) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
- "if (!$src1.new) memb($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
// memh(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
def STrih_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memh($src1<<#$src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
-// memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
-def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
- "memh($src2++#$offset) = $src1.new",
- [],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
// memh(Rx++#s4:1:circ(Mu))=Nt.new
// memh(Rx++I:circ(Mu))=Nt.new
// memh(Rx++Mu)=Nt.new
// memh(Rx++Mu:brev)=Nt.new
-// memh(gp+#u16:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memh(#$global+$offset) = $src.new",
- []>,
- Requires<[HasV4T]>;
-
// memh(#global)=Nt.new
let mayStore = 1, neverHasSideEffects = 1 in
def STh_GP_nv_V4 : NVInst_V4<(outs),
@@ -2568,121 +1791,32 @@ def STh_GP_nv_V4 : NVInst_V4<(outs),
[]>,
Requires<[HasV4T]>;
-
-// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
-// if (Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if ($src1.new) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
- "if (!$src1.new) memh($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
// memw(Ru<<#u2+#U6)=Nt.new
-let mayStore = 1, AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
+isNVStore = 1, validSubTargets = HasV4SubT in
def STriw_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
"memw($src1<<#$src2+#$src3) = $src4.new",
[]>,
Requires<[HasV4T]>;
-// memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
-def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
- "memw($src2++#$offset) = $src1.new",
- [],
- "$src2 = $dst">,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2:circ(Mu))=Nt.new
// memw(Rx++I:circ(Mu))=Nt.new
// memw(Rx++Mu)=Nt.new
// memw(Rx++Mu:brev)=Nt.new
// memw(gp+#u16:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_nv_V4 : NVInst_V4<(outs),
- (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
- "memw(#$global+$offset) = $src.new",
- []>,
- Requires<[HasV4T]>;
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1,
+validSubTargets = HasV4SubT in
def STw_GP_nv_V4 : NVInst_V4<(outs),
(ins globaladdress:$global, IntRegs:$src),
"memw(#$global) = $src.new",
[]>,
Requires<[HasV4T]>;
-// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
-// if (Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if ($src1.new) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
-let mayStore = 1, hasCtrlDep = 1,
- isPredicated = 1 in
-def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
- "if (!$src1.new) memw($src3++#$offset) = $src2.new",
- [],"$src3 = $dst">,
- Requires<[HasV4T]>;
-
-
-
// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
+let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1,
+isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in {
def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1) memb(##$global) = $src2.new",
@@ -2690,7 +1824,6 @@ def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1) memb(##$global) = $src2.new",
@@ -2698,7 +1831,6 @@ def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1.new) memb(##$global) = $src2.new",
@@ -2706,7 +1838,6 @@ def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memb(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1.new) memb(##$global) = $src2.new",
@@ -2714,7 +1845,6 @@ def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1) memh(##$global) = $src2.new",
@@ -2722,7 +1852,6 @@ def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1) memh(##$global) = $src2.new",
@@ -2730,7 +1859,6 @@ def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1.new) memh(##$global) = $src2.new",
@@ -2738,7 +1866,6 @@ def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memh(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1.new) memh(##$global) = $src2.new",
@@ -2746,7 +1873,6 @@ def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1) memw(##$global) = $src2.new",
@@ -2754,7 +1880,6 @@ def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1) memw(##$global) = $src2.new",
@@ -2762,7 +1887,6 @@ def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if ($src1.new) memw(##$global) = $src2.new",
@@ -2770,108 +1894,12 @@ def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
Requires<[HasV4T]>;
// if (!Pv) memw(##global) = Rt
-let mayStore = 1, neverHasSideEffects = 1 in
def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
"if (!$src1.new) memw(##$global) = $src2.new",
[]>,
Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memb(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memh(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if ($src1.new) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in
-def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
- IntRegs:$src2),
- "if (!$src1.new) memw(##$global+$offset) = $src2.new",
- []>,
- Requires<[HasV4T]>;
+}
//===----------------------------------------------------------------------===//
// NV/ST -
@@ -3061,31 +2089,37 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
// Add and accumulate.
// Rd=add(Rs,add(Ru,#s6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, s6Ext:$src3),
"$dst = add($src1, add($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
- s6ImmPred:$src3)))]>,
+ s6_16ExtPred:$src3)))]>,
Requires<[HasV4T]>;
// Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
"$dst = add($src1, sub(#$src2, $src3))",
[(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2,
+ (add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
(i32 IntRegs:$src3))))]>,
Requires<[HasV4T]>;
// Generates the same instruction as ADDr_SUBri_V4 but matches different
// pattern.
// Rd=add(Rs,sub(#s6,Ru))
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ (ins IntRegs:$src1, s6Ext:$src2, IntRegs:$src3),
"$dst = add($src1, sub(#$src2, $src3))",
[(set (i32 IntRegs:$dst),
- (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2),
+ (sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
(i32 IntRegs:$src3)))]>,
Requires<[HasV4T]>;
@@ -3099,6 +2133,7 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
// Logical doublewords.
// Rdd=and(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2),
"$dst = and($src1, ~$src2)",
@@ -3107,6 +2142,7 @@ def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
Requires<[HasV4T]>;
// Rdd=or(Rtt,~Rss)
+let validSubTargets = HasV4SubT in
def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2),
"$dst = or($src1, ~$src2)",
@@ -3117,6 +2153,7 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
// Logical-logical doublewords.
// Rxx^=xor(Rss,Rtt)
+let validSubTargets = HasV4SubT in
def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
(ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
"$dst ^= xor($src2, $src3)",
@@ -3129,17 +2166,20 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
// Logical-logical words.
// Rx=or(Ru,and(Rx,#s10))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT in
def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
"$dst = or($src1, and($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ImmPred:$src3)))],
+ s10ExtPred:$src3)))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx[&|^]=and(Rs,Rt)
// Rx&=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= and($src2, $src3)",
@@ -3150,6 +2190,7 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=and(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "reg" in
def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= and($src2, $src3)",
@@ -3157,9 +2198,10 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Rx^=and(Rs,Rt)
+let validSubTargets = HasV4SubT in
def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= and($src2, $src3)",
@@ -3171,6 +2213,7 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
// Rx[&|^]=and(Rs,~Rt)
// Rx&=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= and($src2, ~$src3)",
@@ -3181,6 +2224,7 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= and($src2, ~$src3)",
@@ -3191,6 +2235,7 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx^=and(Rs,~Rt)
+let validSubTargets = HasV4SubT in
def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= and($src2, ~$src3)",
@@ -3202,6 +2247,7 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
// Rx[&|^]=or(Rs,Rt)
// Rx&=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= or($src2, $src3)",
@@ -3212,6 +2258,7 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=or(Rs,Rt)
+let validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "reg" in
def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= or($src2, $src3)",
@@ -3219,9 +2266,10 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Rx^=or(Rs,Rt)
+let validSubTargets = HasV4SubT in
def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= or($src2, $src3)",
@@ -3233,6 +2281,7 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
// Rx[&|^]=xor(Rs,Rt)
// Rx&=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst &= xor($src2, $src3)",
@@ -3243,6 +2292,7 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst |= xor($src2, $src3)",
@@ -3253,6 +2303,7 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx^=xor(Rs,Rt)
+let validSubTargets = HasV4SubT in
def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
"$dst ^= xor($src2, $src3)",
@@ -3263,24 +2314,28 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rx|=and(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ANDr", InputType = "imm" in
def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
"$dst |= and($src2, #$src3)",
[(set (i32 IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ImmPred:$src3)))],
+ s10ExtPred:$src3)))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Rx|=or(Rs,#s10)
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 10,
+validSubTargets = HasV4SubT, CextOpcode = "ORr_ORr", InputType = "imm" in
def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Ext:$src3),
"$dst |= or($src2, #$src3)",
[(set (i32 IntRegs:$dst),
(or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
- s10ImmPred:$src3)))],
+ s10ExtPred:$src3)))],
"$src1 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Modulo wrap
@@ -3327,25 +2382,41 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
// Multiply and user lower result.
// Rd=add(#u6,mpyi(Rs,#U6))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT in
def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
- (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+ (ins u6Ext:$src1, IntRegs:$src2, u6Imm:$src3),
"$dst = add(#$src1, mpyi($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
- u6ImmPred:$src1))]>,
+ u6ExtPred:$src1))]>,
Requires<[HasV4T]>;
-// Rd=add(#u6,mpyi(Rs,Rt))
+// Rd=add(##,mpyi(Rs,#U6))
+def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+ (HexagonCONST32 tglobaladdr:$src1)),
+ (i32 (ADDi_MPYri_V4 tglobaladdr:$src1, IntRegs:$src2,
+ u6ImmPred:$src3))>;
+// Rd=add(#u6,mpyi(Rs,Rt))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
- (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+ (ins u6Ext:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst = add(#$src1, mpyi($src2, $src3))",
[(set (i32 IntRegs:$dst),
(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- u6ImmPred:$src1))]>,
- Requires<[HasV4T]>;
+ u6ExtPred:$src1))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// Rd=add(##,mpyi(Rs,Rt))
+def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+ (HexagonCONST32 tglobaladdr:$src1)),
+ (i32 (ADDi_MPYrr_V4 tglobaladdr:$src1, IntRegs:$src2,
+ IntRegs:$src3))>;
// Rd=add(Ru,mpyi(#u6:2,Rs))
+let validSubTargets = HasV4SubT in
def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
(ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
"$dst = add($src1, mpyi(#$src2, $src3))",
@@ -3355,15 +2426,18 @@ def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
Requires<[HasV4T]>;
// Rd=add(Ru,mpyi(Rs,#u6))
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 0, opExtentBits = 6,
+validSubTargets = HasV4SubT, InputType = "imm", CextOpcode = "ADD_MPY" in
def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+ (ins IntRegs:$src1, IntRegs:$src2, u6Ext:$src3),
"$dst = add($src1, mpyi($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
- u6ImmPred:$src3)))]>,
- Requires<[HasV4T]>;
+ u6ExtPred:$src3)))]>,
+ Requires<[HasV4T]>, ImmRegRel;
// Rx=add(Ru,mpyi(Rx,Rs))
+let validSubTargets = HasV4SubT, InputType = "reg", CextOpcode = "ADD_MPY" in
def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst = add($src1, mpyi($src2, $src3))",
@@ -3371,7 +2445,7 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
(i32 IntRegs:$src3))))],
"$src2 = $dst">,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Polynomial multiply words
@@ -3414,92 +2488,107 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
// Shift by immediate and accumulate.
// Rx=add(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = add(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx=add(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = add(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx=sub(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = sub(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
// Rx=sub(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = sub(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Shift by immediate and logical.
//Rx=and(#u8,asl(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = and(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=and(#u8,lsr(Rx,#U5))
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+validSubTargets = HasV4SubT in
def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = and(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=or(#u8,asl(Rx,#U5))
-let AddedComplexity = 30 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = or(#$src1, asl($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Rx=or(#u8,lsr(Rx,#U5))
-let AddedComplexity = 30 in
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
+AddedComplexity = 30, validSubTargets = HasV4SubT in
def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
- (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ (ins u8Ext:$src1, IntRegs:$src2, u5Imm:$src3),
"$dst = or(#$src1, lsr($src2, #$src3))",
[(set (i32 IntRegs:$dst),
(or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
- u8ImmPred:$src1))],
+ u8ExtPred:$src1))],
"$src2 = $dst">,
Requires<[HasV4T]>;
//Shift by register.
//Rd=lsl(#s6,Rt)
+let validSubTargets = HasV4SubT in {
def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
"$dst = lsl(#$src1, $src2)",
[(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
@@ -3547,7 +2636,7 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
(i32 IntRegs:$src3))))],
"$src1 = $dst">,
Requires<[HasV4T]>;
-
+}
//===----------------------------------------------------------------------===//
// XTYPE/SHIFT -
@@ -3981,7 +3070,61 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
// incorrect code for negative numbers.
// Pd=cmpb.eq(Rs,#u8)
-let isCompare = 1 in
+// p=!cmp.eq(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.eq($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.eq(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotEQ_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = !cmp.eq($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (setne (i32 IntRegs:$src1), s10ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.gt($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gt(r1,#s10)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGT_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s10Ext:$src2),
+ "$dst = !cmp.gt($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setgt (i32 IntRegs:$src1), s10ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,r2)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_rr : ALU32_rr<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !cmp.gtu($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>,
+ Requires<[HasV4T]>;
+
+// p=!cmp.gtu(r1,#u9)
+let isCompare = 1, validSubTargets = HasV4SubT in
+def CMPnotGTU_ri : ALU32_ri<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u9Ext:$src2),
+ "$dst = !cmp.gtu($src1, #$src2)",
+ [(set (i1 PredRegs:$dst),
+ (not (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, u8Imm:$src2),
"$dst = cmpb.eq($src1, #$src2)",
@@ -3989,8 +3132,14 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
(seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
Requires<[HasV4T]>;
+def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
+ bb:$offset),
+ (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+ bb:$offset)>,
+ Requires<[HasV4T]>;
+
// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.eq($src1, $src2)",
@@ -4000,7 +3149,7 @@ def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
Requires<[HasV4T]>;
// Pd=cmpb.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.eq($src1, $src2)",
@@ -4010,7 +3159,7 @@ def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
Requires<[HasV4T]>;
// Pd=cmpb.gt(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.gt($src1, $src2)",
@@ -4020,29 +3169,237 @@ def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
Requires<[HasV4T]>;
// Pd=cmpb.gtu(Rs,#u7)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU", InputType = "imm" in
def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u7Imm:$src2),
+ (ins IntRegs:$src1, u7Ext:$src2),
"$dst = cmpb.gtu($src1, #$src2)",
[(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
- u7ImmPred:$src2))]>,
- Requires<[HasV4T]>;
+ u7ExtPred:$src2))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformU7ToU7M1Imm(imm);
+}]>;
+
+// For the sequence
+// zext( seteq ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.eq(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setne ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.eq(Rs, #u8)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( seteq (Rs, and(Rt, 255)))
+// Generate
+// Pd=cmpb.eq(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
+ (i32 (and (i32 IntRegs:$Rs), 255)))))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setne (Rs, and(Rt, 255)))
+// Generate
+// Pd=cmpb.eq(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
+ (i32 (and (i32 IntRegs:$Rs), 255)))))),
+ (i32 (TFR_condset_ii (i1 (CMPbEQrr_ubub_V4 (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( and(Rs, 255), u8))
+// Generate
+// Pd=cmpb.gtu(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( and(Rs, 254), u8))
+// Generate
+// Pd=cmpb.gtu(Rs, #u8)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
+ u8ExtPred:$u8)))),
+ (i32 (TFR_condset_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$Rs),
+ (u8ExtPred:$u8))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setult ( Rs, Rt))
+// Generate
+// Pd=cmp.ltu(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setlt ( Rs, Rt))
+// Generate
+// Pd=cmp.lt(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setugt ( Rs, Rt))
+// Generate
+// Pd=cmp.gtu(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 1, 0))>,
+ Requires<[HasV4T]>;
+
+// This pattern interefers with coremark performance, not implementing at this
+// time.
+// For the sequence
+// zext( setgt ( Rs, Rt))
+// Generate
+// Pd=cmp.gt(Rs, Rt)
+// if (Pd.new) Rd=#1
+// if (!Pd.new) Rd=#0
+
+// For the sequence
+// zext( setuge ( Rs, Rt))
+// Generate
+// Pd=cmp.ltu(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setge ( Rs, Rt))
+// Generate
+// Pd=cmp.lt(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
+def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rt),
+ (i32 IntRegs:$Rs))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setule ( Rs, Rt))
+// Generate
+// Pd=cmp.gtu(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTUrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setle ( Rs, Rt))
+// Generate
+// Pd=cmp.gt(Rs, Rt)
+// if (Pd.new) Rd=#0
+// if (!Pd.new) Rd=#1
+def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
+ (i32 (TFR_condset_ii (i1 (CMPGTrr (i32 IntRegs:$Rs),
+ (i32 IntRegs:$Rt))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
+
+// For the sequence
+// zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'mux_ii(cmpbgtu(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+// biasing the expression by 48. We are depending on the representation of
+// the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+// retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+// retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def : Pat <(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+ u7StrictPosImmPred:$src2)))),
+ (i32 (MUX_ii (i1 (CMPbGTUri_V4 (i32 IntRegs:$src1),
+ (DEC_CONST_BYTE u7StrictPosImmPred:$src2))),
+ 0, 1))>,
+ Requires<[HasV4T]>;
// Pd=cmpb.gtu(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPbGTU",
+InputType = "reg" in
def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmpb.gtu($src1, $src2)",
[(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
(and (i32 IntRegs:$src2), 255)))]>,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Following instruction is not being extended as it results into the incorrect
// code for negative numbers.
// Signed half compare(.eq) ri.
// Pd=cmph.eq(Rs,#s8)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, s8Imm:$src2),
"$dst = cmph.eq($src1, #$src2)",
@@ -4056,7 +3413,7 @@ def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
// r0=and(r0,#0xffff)
// p0=cmp.eq(r0,#0)
// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.eq($src1, $src2)",
@@ -4071,7 +3428,7 @@ def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
// r1=asl(r1,16)
// p0=cmp.eq(r0,r1)
// Pd=cmph.eq(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.eq($src1, $src2)",
@@ -4085,19 +3442,20 @@ used in the cmph.gt instruction.
// Signed half compare(.gt) ri.
// Pd=cmph.gt(Rs,#s8)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+isCompare = 1, validSubTargets = HasV4SubT in
def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, s8Imm:$src2),
+ (ins IntRegs:$src1, s8Ext:$src2),
"$dst = cmph.gt($src1, #$src2)",
[(set (i1 PredRegs:$dst),
(setgt (shl (i32 IntRegs:$src1), (i32 16)),
- s8ImmPred:$src2))]>,
+ s8ExtPred:$src2))]>,
Requires<[HasV4T]>;
*/
// Signed half compare(.gt) rr.
// Pd=cmph.gt(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT in
def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.gt($src1, $src2)",
@@ -4108,24 +3466,41 @@ def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
// Unsigned half compare rr (.gtu).
// Pd=cmph.gtu(Rs,Rt)
-let isCompare = 1 in
+let isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "reg" in
def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = cmph.gtu($src1, $src2)",
[(set (i1 PredRegs:$dst),
(setugt (and (i32 IntRegs:$src1), 65535),
(and (i32 IntRegs:$src2), 65535)))]>,
- Requires<[HasV4T]>;
+ Requires<[HasV4T]>, ImmRegRel;
// Unsigned half compare ri (.gtu).
// Pd=cmph.gtu(Rs,#u7)
-let isCompare = 1 in
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 7,
+isCompare = 1, validSubTargets = HasV4SubT, CextOpcode = "CMPhGTU",
+InputType = "imm" in
def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
- (ins IntRegs:$src1, u7Imm:$src2),
+ (ins IntRegs:$src1, u7Ext:$src2),
"$dst = cmph.gtu($src1, #$src2)",
[(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
- u7ImmPred:$src2))]>,
- Requires<[HasV4T]>;
+ u7ExtPred:$src2))]>,
+ Requires<[HasV4T]>, ImmRegRel;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = !tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>,
+ Requires<[HasV4T]>;
+
+let validSubTargets = HasV4SubT in
+def NTSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = !tstbit($src1, $src2)",
+ [(set (i1 PredRegs:$dst),
+ (seteq (and (shl 1, u5ImmPred:$src2), (i32 IntRegs:$src1)), 0))]>,
+ Requires<[HasV4T]>;
//===----------------------------------------------------------------------===//
// XTYPE/PRED -
@@ -4237,227 +3612,156 @@ let isReturn = 1, isTerminator = 1,
Requires<[HasV4T]>;
}
-
// Load/Store with absolute addressing mode
// memw(#u6)=Rt
-multiclass ST_abs<string OpcStr> {
- let isPredicable = 1 in
- def _abs_V4 : STInst2<(outs),
- (ins globaladdress:$absaddr, IntRegs:$src),
- !strconcat(OpcStr, "(##$absaddr) = $src"),
- []>,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in
- def _abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
- []>,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in
- def _abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
+multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_V4 : STInst2<(outs),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"(##$absaddr) = $src2",
[]>,
Requires<[HasV4T]>;
+}
- let isPredicated = 1 in
- def _abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
- []>,
- Requires<[HasV4T]>;
+multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2")),
+let isNVStorable = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def NAME#_V4 : STInst2<(outs),
+ (ins globaladdressExt:$absaddr, RC:$src),
+ mnemonic#"(##$absaddr) = $src",
[]>,
Requires<[HasV4T]>;
- def _abs_nv_V4 : STInst2<(outs),
- (ins globaladdress:$absaddr, IntRegs:$src),
- !strconcat(OpcStr, "(##$absaddr) = $src.new"),
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 1, isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
+}
- let isPredicated = 1 in
- def _abs_cPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#mnemonic#"(##$absaddr) = $src2.new",
[]>,
Requires<[HasV4T]>;
+}
- let isPredicated = 1 in
- def _abs_cNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
- []>,
- Requires<[HasV4T]>;
+multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cdnPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+let mayStore = 1, isNVStore = 1, isExtended = 1, neverHasSideEffects = 1 in
+multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 0, isPredicable = 1 in
+ def NAME#_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdressExt:$absaddr, RC:$src),
+ mnemonic#"(##$absaddr) = $src.new",
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
- !strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 1, isPredicated = 1 in {
+ defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>;
+ defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>;
+ }
+ }
}
-let AddedComplexity = 30, isPredicable = 1 in
-def STrid_abs_V4 : STInst<(outs),
- (ins globaladdress:$absaddr, DoubleRegs:$src),
- "memd(##$absaddr) = $src",
- [(store (i64 DoubleRegs:$src),
- (HexagonCONST32 tglobaladdr:$absaddr))]>,
- Requires<[HasV4T]>;
+let addrMode = Absolute in {
+ defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
+ ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if ($src1) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
+ defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
+ ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if (!$src1) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
+ defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
+ ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if ($src1.new) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def STrid_abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
- "if (!$src1.new) memd(##$absaddr) = $src2",
- []>,
- Requires<[HasV4T]>;
-
-defm STrib : ST_abs<"memb">;
-defm STrih : ST_abs<"memh">;
-defm STriw : ST_abs<"memw">;
+ let isNVStorable = 0 in
+ defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
-let Predicates = [HasV4T], AddedComplexity = 30 in
+let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
(STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
-let Predicates = [HasV4T], AddedComplexity = 30 in
def : Pat<(truncstorei16 (i32 IntRegs:$src1),
(HexagonCONST32 tglobaladdr:$absaddr)),
(STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
-let Predicates = [HasV4T], AddedComplexity = 30 in
def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
(STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+def : Pat<(store (i64 DoubleRegs:$src1),
+ (HexagonCONST32 tglobaladdr:$absaddr)),
+ (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>;
+}
-multiclass LD_abs<string OpcStr> {
- let isPredicable = 1 in
- def _abs_V4 : LDInst2<(outs IntRegs:$dst),
- (ins globaladdress:$absaddr),
- !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
-
- let isPredicated = 1 in
- def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if ($src1) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
+multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let PNewValue = !if(isPredNew, "new", "") in
+ def NAME : LDInst2<(outs RC:$dst),
+ (ins PredRegs:$src1, globaladdressExt:$absaddr),
+ !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"(##$absaddr)",
[]>,
Requires<[HasV4T]>;
+}
- let isPredicated = 1 in
- def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if (!$src1) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
+multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let PredSense = !if(PredNot, "false", "true") in {
+ defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
+ // Predicate new
+ defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
+ }
+}
- let isPredicated = 1 in
- def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if ($src1.new) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
+let isExtended = 1, neverHasSideEffects = 1 in
+multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
+ let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
+ let opExtendable = 1, isPredicable = 1 in
+ def NAME#_V4 : LDInst2<(outs RC:$dst),
+ (ins globaladdressExt:$absaddr),
+ "$dst = "#mnemonic#"(##$absaddr)",
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- !strconcat("if (!$src1.new) $dst = ",
- !strconcat(OpcStr, "(##$absaddr)")),
- []>,
- Requires<[HasV4T]>;
+ let opExtendable = 2, isPredicated = 1 in {
+ defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>;
+ defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>;
+ }
+ }
}
-let AddedComplexity = 30 in
-def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins globaladdress:$absaddr),
- "$dst = memd(##$absaddr)",
- [(set (i64 DoubleRegs:$dst),
- (load (HexagonCONST32 tglobaladdr:$absaddr)))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if ($src1) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if (!$src1) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if ($src1.new) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 30, isPredicated = 1 in
-def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, globaladdress:$absaddr),
- "if (!$src1.new) $dst = memd(##$absaddr)",
- []>,
- Requires<[HasV4T]>;
-
-defm LDrib : LD_abs<"memb">;
-defm LDriub : LD_abs<"memub">;
-defm LDrih : LD_abs<"memh">;
-defm LDriuh : LD_abs<"memuh">;
-defm LDriw : LD_abs<"memw">;
-
+let addrMode = Absolute in {
+ defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
+ defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+ defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
+ defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+ defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+ defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
let Predicates = [HasV4T], AddedComplexity = 30 in
def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
@@ -4577,172 +3881,167 @@ defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
defm STriw_ind : ST_indirect_lo<"memw", store>;
// Store - absolute addressing mode: These instruction take constant
-// value as the extended operand
+// value as the extended operand.
multiclass ST_absimm<string OpcStr> {
- let isPredicable = 1 in
+let isExtended = 1, opExtendable = 0, isPredicable = 1,
+validSubTargets = HasV4SubT in
def _abs_V4 : STInst2<(outs),
- (ins u6Imm:$src1, IntRegs:$src2),
- !strconcat(OpcStr, "(#$src1) = $src2"),
+ (ins u0AlwaysExt:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(##$src1) = $src2"),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
+let isExtended = 1, opExtendable = 1, isPredicated = 1,
+validSubTargets = HasV4SubT in {
def _abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
- !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
- !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
+ !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3")),
+ !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3")),
+ !strconcat(OpcStr, "(##$src2) = $src3")),
[]>,
Requires<[HasV4T]>;
+}
- def _abs_nv_V4 : STInst2<(outs),
- (ins u6Imm:$src1, IntRegs:$src2),
- !strconcat(OpcStr, "(#$src1) = $src2.new"),
+let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
+validSubTargets = HasV4SubT in
+ def _abs_nv_V4 : NVInst_V4<(outs),
+ (ins u0AlwaysExt:$src1, IntRegs:$src2),
+ !strconcat(OpcStr, "(##$src1) = $src2.new"),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
+isNVStore = 1, validSubTargets = HasV4SubT in {
+ def _abs_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if ($src1)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if (!$src1)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
- def _abs_cdnNotPt_nv_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
!strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(#$src2) = $src3.new")),
+ !strconcat(OpcStr, "(##$src2) = $src3.new")),
[]>,
Requires<[HasV4T]>;
}
+}
defm STrib_imm : ST_absimm<"memb">;
defm STrih_imm : ST_absimm<"memh">;
defm STriw_imm : ST_absimm<"memw">;
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2),
- (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2),
- (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
-
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2),
- (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
+ (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+}
// Load - absolute addressing mode: These instruction take constant
// value as the extended operand
multiclass LD_absimm<string OpcStr> {
- let isPredicable = 1 in
+let isExtended = 1, opExtendable = 1, isPredicable = 1,
+validSubTargets = HasV4SubT in
def _abs_V4 : LDInst2<(outs IntRegs:$dst),
- (ins u6Imm:$src),
+ (ins u0AlwaysExt:$src),
!strconcat("$dst = ",
- !strconcat(OpcStr, "(#$src)")),
+ !strconcat(OpcStr, "(##$src)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
+let isExtended = 1, opExtendable = 2, isPredicated = 1,
+validSubTargets = HasV4SubT in {
def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if ($src1) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if (!$src1) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if ($src1.new) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
- let isPredicated = 1 in
def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u6Imm:$src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$src2),
!strconcat("if (!$src1.new) $dst = ",
- !strconcat(OpcStr, "(#$src2)")),
+ !strconcat(OpcStr, "(##$src2)")),
[]>,
Requires<[HasV4T]>;
}
+}
-defm LDrib_imm : LD_absimm<"memb">;
+defm LDrib_imm : LD_absimm<"memb">;
defm LDriub_imm : LD_absimm<"memub">;
-defm LDrih_imm : LD_absimm<"memh">;
+defm LDrih_imm : LD_absimm<"memh">;
defm LDriuh_imm : LD_absimm<"memuh">;
-defm LDriw_imm : LD_absimm<"memw">;
+defm LDriw_imm : LD_absimm<"memw">;
-let Predicates = [HasV4T], AddedComplexity = 30 in
-def : Pat<(i32 (load u6ImmPred:$src)),
- (LDriw_imm_abs_V4 u6ImmPred:$src)>;
+let Predicates = [HasV4T], AddedComplexity = 30 in {
+def : Pat<(i32 (load u0AlwaysExtPred:$src)),
+ (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (sextloadi8 u6ImmPred:$src)),
- (LDrib_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
+ (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (zextloadi8 u6ImmPred:$src)),
- (LDriub_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
+ (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (sextloadi16 u6ImmPred:$src)),
- (LDrih_imm_abs_V4 u6ImmPred:$src)>;
-
-let Predicates = [HasV4T], AddedComplexity=30 in
-def : Pat<(i32 (zextloadi16 u6ImmPred:$src)),
- (LDriuh_imm_abs_V4 u6ImmPred:$src)>;
+def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
+ (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
+def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
+ (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+}
// Indexed store double word - global address.
// memw(Rs+#u6:2)=#S8
@@ -4764,3 +4063,109 @@ def STrih_offset_ext_V4 : STInst<(outs),
[(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
(add IntRegs:$src1, u6_1ImmPred:$src2))]>,
Requires<[HasV4T]>;
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1),
+ FoldGlobalAddrGP:$addr),
+ (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 FoldGlobalAddrGP:$addr,
+ (i64 DoubleRegs:$src1)),
+ (STrid_abs_V4 FoldGlobalAddrGP:$addr, (i64 DoubleRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STrib_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STrih_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), FoldGlobalAddrGP:$addr),
+ (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1)),
+ (STriw_abs_V4 FoldGlobalAddrGP:$addr, (i32 IntRegs:$src1))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i64 (load FoldGlobalAddrGP:$addr)),
+ (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_64 FoldGlobalAddrGP:$addr),
+ (i64 (LDrid_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrib_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+//let AddedComplexity = 100 in
+let AddedComplexity = 100 in
+def : Pat<(i32 (extloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (sextloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDrih_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi16 FoldGlobalAddrGP:$addr)),
+ (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_16 FoldGlobalAddrGP:$addr),
+ (i32 (LDriuh_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (zextloadi8 FoldGlobalAddrGP:$addr)),
+ (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_8 FoldGlobalAddrGP:$addr),
+ (i32 (LDriub_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
+ (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
+def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
+ (i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
+ Requires<[HasV4T]>;
+
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index db36ac0..f011d51 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,6 +15,7 @@
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/MCExpr.h"
@@ -38,9 +39,10 @@ static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
}
// Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI,
+void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
HexagonAsmPrinter& AP) {
MCI.setOpcode(MI->getOpcode());
+ MCI.setDesc(MI->getDesc());
for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
const MachineOperand &MO = MI->getOperand(i);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index aef6830..ced17b3 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -152,6 +152,12 @@ void VLIWMachineScheduler::schedule() {
// Postprocess the DAG to add platform specific artificial dependencies.
postprocessDAG();
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
// To view Height/Depth correctly, they should be accessed at least once.
DEBUG(unsigned maxH = 0;
for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
@@ -166,7 +172,7 @@ void VLIWMachineScheduler::schedule() {
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
- initQueues();
+ initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
@@ -186,6 +192,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
DAG = static_cast<VLIWMachineScheduler*>(dag);
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
+
Top.init(DAG, SchedModel);
Bot.init(DAG, SchedModel);
@@ -193,6 +200,8 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
// are disabled, then these HazardRecs will be disabled.
const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
const TargetMachine &TM = DAG->MF.getTarget();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
@@ -677,4 +686,3 @@ void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
Bot.bumpNode(SU);
}
}
-
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d1882de..f947dfc 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -117,37 +117,15 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
"architecture version");
}
-void HexagonRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MachineInstr &MI = *I;
-
- if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
- // Hexagon_TODO: add code
- } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
- // Hexagon_TODO: add code
- } else {
- llvm_unreachable("Cannot handle this call frame pseudo instruction");
- }
- MBB.erase(I);
-}
-
void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
-
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
//
// Hexagon_TODO: Do we need to enforce this for Hexagon?
assert(SPAdj == 0 && "Unexpected");
-
- unsigned i = 0;
MachineInstr &MI = *II;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Addressable stack objects are accessed using neg. offsets from %fp.
MachineFunction &MF = *MI.getParent()->getParent();
@@ -167,8 +145,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
!TII.isSpillPredRegOp(&MI)) {
// Replace frame index with a stack pointer reference.
- MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), false,
+ false, true);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(FrameSize+Offset);
} else {
// Replace frame index with a frame pointer reference.
if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
@@ -205,8 +184,8 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
dstReg).addReg(FrameReg).addImm(Offset);
}
- MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
} else if ((MI.getOpcode() == Hexagon::STriw_indexed) ||
(MI.getOpcode() == Hexagon::STriw) ||
(MI.getOpcode() == Hexagon::STrid) ||
@@ -233,29 +212,31 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TII.get(Hexagon::ADD_ri),
resReg).addReg(FrameReg).addImm(Offset);
}
- MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
} else if (TII.isMemOp(&MI)) {
unsigned resReg = HEXAGON_RESERVED_REG_1;
if (!MFI.hasVarSizedObjects() &&
TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
- MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
- true);
- MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(),
+ false, false, true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset);
} else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::ADD_rr),
resReg).addReg(FrameReg).addReg(resReg);
- MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
} else {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(Hexagon::ADD_ri),
resReg).addReg(FrameReg).addImm(Offset);
- MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,
+ true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
}
} else {
unsigned dstReg = MI.getOperand(0).getReg();
@@ -265,14 +246,14 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
TII.get(Hexagon::ADD_rr),
dstReg).addReg(FrameReg).addReg(dstReg);
// Can we delete MI??? r2 = add (r2, #0).
- MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
- MI.getOperand(i+1).ChangeToImmediate(0);
+ MI.getOperand(FIOperandNum).ChangeToRegister(dstReg, false, false,true);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(0);
}
} else {
// If the offset is small enough to fit in the immediate field, directly
// encode it.
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
}
}
@@ -310,58 +291,6 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
Moves.push_back(MachineMove(0, Dst, Src));
}
-// Get the weight in units of pressure for this register class.
-const RegClassWeight &
-HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const {
- // Each TargetRegisterClass has a per register weight, and weight
- // limit which must be less than the limits of its pressure sets.
- static const RegClassWeight RCWeightTable[] = {
- {1, 32}, // IntRegs
- {1, 8}, // CRRegs
- {1, 4}, // PredRegs
- {2, 16}, // DoubleRegs
- {0, 0} };
- return RCWeightTable[RC->getID()];
-}
-
-/// Get the number of dimensions of register pressure.
-unsigned HexagonRegisterInfo::getNumRegPressureSets() const {
- return 4;
-}
-
-/// Get the name of this register unit pressure set.
-const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const {
- static const char *const RegPressureSetName[] = {
- "IntRegsRegSet",
- "CRRegsRegSet",
- "PredRegsRegSet",
- "DoubleRegsRegSet"
- };
- assert((Idx < 4) && "Index out of bounds");
- return RegPressureSetName[Idx];
-}
-
-/// Get the register unit pressure limit for this dimension.
-/// This limit must be adjusted dynamically for reserved registers.
-unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
- static const int RegPressureLimit [] = { 16, 4, 2, 8 };
- assert((Idx < 4) && "Index out of bounds");
- return RegPressureLimit[Idx];
-}
-
-const int*
-HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC)
- const {
- static const int RCSetsTable[] = {
- 0, -1, // IntRegs
- 1, -1, // CRRegs
- 2, -1, // PredRegs
- 0, -1, // DoubleRegs
- -1 };
- static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 };
- unsigned SetListStart = RCSetStartTable[RC->getID()];
- return &RCSetsTable[SetListStart];
-}
unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index e8f3cfb..8a3f94a 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -56,12 +56,9 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
@@ -87,11 +84,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
// Exception handling queries.
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
- unsigned getNumRegPressureSets() const;
- const char *getRegPressureSetName(unsigned Idx) const;
- unsigned getRegPressureSetLimit(unsigned Idx) const;
- const int* getRegClassPressureSets(const TargetRegisterClass *RC) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index b5ff69a..c2cfbb9 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -8,10 +8,11 @@
//===----------------------------------------------------------------------===//
// Functional Units
-def LUNIT : FuncUnit;
-def LSUNIT : FuncUnit;
-def MUNIT : FuncUnit;
-def SUNIT : FuncUnit;
+def LSUNIT : FuncUnit; // SLOT0
+def LUNIT : FuncUnit; // SLOT1
+def MUNIT : FuncUnit; // SLOT2
+def SUNIT : FuncUnit; // SLOT3
+def LOOPUNIT : FuncUnit;
// Itinerary classes
def ALU32 : InstrItinClass;
@@ -20,27 +21,34 @@ def CR : InstrItinClass;
def J : InstrItinClass;
def JR : InstrItinClass;
def LD : InstrItinClass;
+def LD0 : InstrItinClass;
def M : InstrItinClass;
def ST : InstrItinClass;
+def ST0 : InstrItinClass;
def S : InstrItinClass;
def SYS : InstrItinClass;
-def MARKER : InstrItinClass;
+def ENDLOOP : InstrItinClass;
def PSEUDO : InstrItinClass;
+def PSEUDOM : InstrItinClass;
def HexagonItineraries :
- ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+ ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [
InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<LD0 , [InstrStage<1, [LSUNIT]>]>,
InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<ST0 , [InstrStage<1, [LSUNIT]>]>,
InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+ InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>,
+ InstrStage<1, [MUNIT, SUNIT]>]>
]>;
def HexagonModel : SchedMachineModel {
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 5668ae8..ef72cf4 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -28,6 +28,10 @@ def SLOT0 : FuncUnit;
def SLOT1 : FuncUnit;
def SLOT2 : FuncUnit;
def SLOT3 : FuncUnit;
+// Endloop is a pseudo instruction that is encoded with 2 bits in a packet
+// rather than taking an execution slot. This special unit is needed
+// to schedule an ENDLOOP with 4 other instructions.
+def SLOT_ENDLOOP: FuncUnit;
// Itinerary classes.
def NV_V4 : InstrItinClass;
@@ -36,22 +40,26 @@ def MEM_V4 : InstrItinClass;
def PREFIX : InstrItinClass;
def HexagonItinerariesV4 :
- ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>,
InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>,
InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
]>;
def HexagonModelV4 : SchedMachineModel {
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 287b3d6..d9fef3e 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -122,7 +122,7 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
- addPass(createHexagonISelDag(getHexagonTargetMachine()));
+ addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
addPass(createHexagonPeephole());
return false;
}
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 409a243..aff6b86 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -376,7 +376,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STrib_indexed:
case Hexagon::STrib_indexed_shl_V4:
case Hexagon::STrib_shl_V4:
- case Hexagon::STrib_GP_V4:
case Hexagon::STb_GP_V4:
case Hexagon::POST_STbri:
case Hexagon::STrib_cPt:
@@ -399,17 +398,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STb_GP_cNotPt_V4:
case Hexagon::STb_GP_cdnPt_V4:
case Hexagon::STb_GP_cdnNotPt_V4:
- case Hexagon::STrib_GP_cPt_V4:
- case Hexagon::STrib_GP_cNotPt_V4:
- case Hexagon::STrib_GP_cdnPt_V4:
- case Hexagon::STrib_GP_cdnNotPt_V4:
// store halfword
case Hexagon::STrih:
case Hexagon::STrih_indexed:
case Hexagon::STrih_indexed_shl_V4:
case Hexagon::STrih_shl_V4:
- case Hexagon::STrih_GP_V4:
case Hexagon::STh_GP_V4:
case Hexagon::POST_SThri:
case Hexagon::STrih_cPt:
@@ -432,17 +426,12 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STh_GP_cNotPt_V4:
case Hexagon::STh_GP_cdnPt_V4:
case Hexagon::STh_GP_cdnNotPt_V4:
- case Hexagon::STrih_GP_cPt_V4:
- case Hexagon::STrih_GP_cNotPt_V4:
- case Hexagon::STrih_GP_cdnPt_V4:
- case Hexagon::STrih_GP_cdnNotPt_V4:
// store word
case Hexagon::STriw:
case Hexagon::STriw_indexed:
case Hexagon::STriw_indexed_shl_V4:
case Hexagon::STriw_shl_V4:
- case Hexagon::STriw_GP_V4:
case Hexagon::STw_GP_V4:
case Hexagon::POST_STwri:
case Hexagon::STriw_cPt:
@@ -465,10 +454,6 @@ bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
case Hexagon::STw_GP_cNotPt_V4:
case Hexagon::STw_GP_cdnPt_V4:
case Hexagon::STw_GP_cdnNotPt_V4:
- case Hexagon::STriw_GP_cPt_V4:
- case Hexagon::STriw_GP_cNotPt_V4:
- case Hexagon::STriw_GP_cdnPt_V4:
- case Hexagon::STriw_GP_cdnNotPt_V4:
return QRI->Subtarget.hasV4TOps();
}
return false;
@@ -508,9 +493,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STrib_shl_V4:
return Hexagon::STrib_shl_nv_V4;
- case Hexagon::STrib_GP_V4:
- return Hexagon::STrib_GP_nv_V4;
-
case Hexagon::STb_GP_V4:
return Hexagon::STb_GP_nv_V4;
@@ -577,18 +559,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STb_GP_cdnNotPt_V4:
return Hexagon::STb_GP_cdnNotPt_nv_V4;
- case Hexagon::STrib_GP_cPt_V4:
- return Hexagon::STrib_GP_cPt_nv_V4;
-
- case Hexagon::STrib_GP_cNotPt_V4:
- return Hexagon::STrib_GP_cNotPt_nv_V4;
-
- case Hexagon::STrib_GP_cdnPt_V4:
- return Hexagon::STrib_GP_cdnPt_nv_V4;
-
- case Hexagon::STrib_GP_cdnNotPt_V4:
- return Hexagon::STrib_GP_cdnNotPt_nv_V4;
-
// store new value halfword
case Hexagon::STrih:
return Hexagon::STrih_nv_V4;
@@ -602,9 +572,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STrih_shl_V4:
return Hexagon::STrih_shl_nv_V4;
- case Hexagon::STrih_GP_V4:
- return Hexagon::STrih_GP_nv_V4;
-
case Hexagon::STh_GP_V4:
return Hexagon::STh_GP_nv_V4;
@@ -671,18 +638,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STh_GP_cdnNotPt_V4:
return Hexagon::STh_GP_cdnNotPt_nv_V4;
- case Hexagon::STrih_GP_cPt_V4:
- return Hexagon::STrih_GP_cPt_nv_V4;
-
- case Hexagon::STrih_GP_cNotPt_V4:
- return Hexagon::STrih_GP_cNotPt_nv_V4;
-
- case Hexagon::STrih_GP_cdnPt_V4:
- return Hexagon::STrih_GP_cdnPt_nv_V4;
-
- case Hexagon::STrih_GP_cdnNotPt_V4:
- return Hexagon::STrih_GP_cdnNotPt_nv_V4;
-
// store new value word
case Hexagon::STriw:
return Hexagon::STriw_nv_V4;
@@ -696,9 +651,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STriw_shl_V4:
return Hexagon::STriw_shl_nv_V4;
- case Hexagon::STriw_GP_V4:
- return Hexagon::STriw_GP_nv_V4;
-
case Hexagon::STw_GP_V4:
return Hexagon::STw_GP_nv_V4;
@@ -765,17 +717,6 @@ static int GetDotNewOp(const int opc) {
case Hexagon::STw_GP_cdnNotPt_V4:
return Hexagon::STw_GP_cdnNotPt_nv_V4;
- case Hexagon::STriw_GP_cPt_V4:
- return Hexagon::STriw_GP_cPt_nv_V4;
-
- case Hexagon::STriw_GP_cNotPt_V4:
- return Hexagon::STriw_GP_cNotPt_nv_V4;
-
- case Hexagon::STriw_GP_cdnPt_V4:
- return Hexagon::STriw_GP_cdnPt_nv_V4;
-
- case Hexagon::STriw_GP_cdnNotPt_V4:
- return Hexagon::STriw_GP_cdnNotPt_nv_V4;
}
}
@@ -821,12 +762,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STb_GP_cNotPt_V4 :
return Hexagon::STb_GP_cdnNotPt_V4;
- case Hexagon::STrib_GP_cPt_V4 :
- return Hexagon::STrib_GP_cdnPt_V4;
-
- case Hexagon::STrib_GP_cNotPt_V4 :
- return Hexagon::STrib_GP_cdnNotPt_V4;
-
// Store doubleword conditionally
case Hexagon::STrid_cPt :
return Hexagon::STrid_cdnPt_V4;
@@ -858,12 +793,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STd_GP_cNotPt_V4 :
return Hexagon::STd_GP_cdnNotPt_V4;
- case Hexagon::STrid_GP_cPt_V4 :
- return Hexagon::STrid_GP_cdnPt_V4;
-
- case Hexagon::STrid_GP_cNotPt_V4 :
- return Hexagon::STrid_GP_cdnNotPt_V4;
-
// Store halfword conditionally
case Hexagon::STrih_cPt :
return Hexagon::STrih_cdnPt_V4;
@@ -901,12 +830,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STh_GP_cNotPt_V4 :
return Hexagon::STh_GP_cdnNotPt_V4;
- case Hexagon::STrih_GP_cPt_V4 :
- return Hexagon::STrih_GP_cdnPt_V4;
-
- case Hexagon::STrih_GP_cNotPt_V4 :
- return Hexagon::STrih_GP_cdnNotPt_V4;
-
// Store word conditionally
case Hexagon::STriw_cPt :
return Hexagon::STriw_cdnPt_V4;
@@ -944,12 +867,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STw_GP_cNotPt_V4 :
return Hexagon::STw_GP_cdnNotPt_V4;
- case Hexagon::STriw_GP_cPt_V4 :
- return Hexagon::STriw_GP_cdnPt_V4;
-
- case Hexagon::STriw_GP_cNotPt_V4 :
- return Hexagon::STriw_GP_cdnNotPt_V4;
-
// Condtional Jumps
case Hexagon::JMP_c:
return Hexagon::JMP_cdnPt;
@@ -1092,72 +1009,36 @@ static int GetDotNewPredOp(const int opc) {
// V4 indexed+scaled load
- case Hexagon::LDrid_indexed_cPt_V4 :
- return Hexagon::LDrid_indexed_cdnPt_V4;
-
- case Hexagon::LDrid_indexed_cNotPt_V4 :
- return Hexagon::LDrid_indexed_cdnNotPt_V4;
-
case Hexagon::LDrid_indexed_shl_cPt_V4 :
return Hexagon::LDrid_indexed_shl_cdnPt_V4;
case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDrib_indexed_cPt_V4 :
- return Hexagon::LDrib_indexed_cdnPt_V4;
-
- case Hexagon::LDrib_indexed_cNotPt_V4 :
- return Hexagon::LDrib_indexed_cdnNotPt_V4;
-
case Hexagon::LDrib_indexed_shl_cPt_V4 :
return Hexagon::LDrib_indexed_shl_cdnPt_V4;
case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDriub_indexed_cPt_V4 :
- return Hexagon::LDriub_indexed_cdnPt_V4;
-
- case Hexagon::LDriub_indexed_cNotPt_V4 :
- return Hexagon::LDriub_indexed_cdnNotPt_V4;
-
case Hexagon::LDriub_indexed_shl_cPt_V4 :
return Hexagon::LDriub_indexed_shl_cdnPt_V4;
case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDrih_indexed_cPt_V4 :
- return Hexagon::LDrih_indexed_cdnPt_V4;
-
- case Hexagon::LDrih_indexed_cNotPt_V4 :
- return Hexagon::LDrih_indexed_cdnNotPt_V4;
-
case Hexagon::LDrih_indexed_shl_cPt_V4 :
return Hexagon::LDrih_indexed_shl_cdnPt_V4;
case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDriuh_indexed_cPt_V4 :
- return Hexagon::LDriuh_indexed_cdnPt_V4;
-
- case Hexagon::LDriuh_indexed_cNotPt_V4 :
- return Hexagon::LDriuh_indexed_cdnNotPt_V4;
-
case Hexagon::LDriuh_indexed_shl_cPt_V4 :
return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
- case Hexagon::LDriw_indexed_cPt_V4 :
- return Hexagon::LDriw_indexed_cdnPt_V4;
-
- case Hexagon::LDriw_indexed_cNotPt_V4 :
- return Hexagon::LDriw_indexed_cdnNotPt_V4;
-
case Hexagon::LDriw_indexed_shl_cPt_V4 :
return Hexagon::LDriw_indexed_shl_cdnPt_V4;
@@ -1202,42 +1083,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::LDw_GP_cNotPt_V4:
return Hexagon::LDw_GP_cdnNotPt_V4;
- case Hexagon::LDrid_GP_cPt_V4:
- return Hexagon::LDrid_GP_cdnPt_V4;
-
- case Hexagon::LDrid_GP_cNotPt_V4:
- return Hexagon::LDrid_GP_cdnNotPt_V4;
-
- case Hexagon::LDrib_GP_cPt_V4:
- return Hexagon::LDrib_GP_cdnPt_V4;
-
- case Hexagon::LDrib_GP_cNotPt_V4:
- return Hexagon::LDrib_GP_cdnNotPt_V4;
-
- case Hexagon::LDriub_GP_cPt_V4:
- return Hexagon::LDriub_GP_cdnPt_V4;
-
- case Hexagon::LDriub_GP_cNotPt_V4:
- return Hexagon::LDriub_GP_cdnNotPt_V4;
-
- case Hexagon::LDrih_GP_cPt_V4:
- return Hexagon::LDrih_GP_cdnPt_V4;
-
- case Hexagon::LDrih_GP_cNotPt_V4:
- return Hexagon::LDrih_GP_cdnNotPt_V4;
-
- case Hexagon::LDriuh_GP_cPt_V4:
- return Hexagon::LDriuh_GP_cdnPt_V4;
-
- case Hexagon::LDriuh_GP_cNotPt_V4:
- return Hexagon::LDriuh_GP_cdnNotPt_V4;
-
- case Hexagon::LDriw_GP_cPt_V4:
- return Hexagon::LDriw_GP_cdnPt_V4;
-
- case Hexagon::LDriw_GP_cNotPt_V4:
- return Hexagon::LDriw_GP_cdnNotPt_V4;
-
// Conditional store new-value byte
case Hexagon::STrib_cPt_nv_V4 :
return Hexagon::STrib_cdnPt_nv_V4;
@@ -1265,12 +1110,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STb_GP_cNotPt_nv_V4 :
return Hexagon::STb_GP_cdnNotPt_nv_V4;
- case Hexagon::STrib_GP_cPt_nv_V4 :
- return Hexagon::STrib_GP_cdnPt_nv_V4;
-
- case Hexagon::STrib_GP_cNotPt_nv_V4 :
- return Hexagon::STrib_GP_cdnNotPt_nv_V4;
-
// Conditional store new-value halfword
case Hexagon::STrih_cPt_nv_V4 :
return Hexagon::STrih_cdnPt_nv_V4;
@@ -1298,12 +1137,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STh_GP_cNotPt_nv_V4 :
return Hexagon::STh_GP_cdnNotPt_nv_V4;
- case Hexagon::STrih_GP_cPt_nv_V4 :
- return Hexagon::STrih_GP_cdnPt_nv_V4;
-
- case Hexagon::STrih_GP_cNotPt_nv_V4 :
- return Hexagon::STrih_GP_cdnNotPt_nv_V4;
-
// Conditional store new-value word
case Hexagon::STriw_cPt_nv_V4 :
return Hexagon::STriw_cdnPt_nv_V4;
@@ -1331,12 +1164,6 @@ static int GetDotNewPredOp(const int opc) {
case Hexagon::STw_GP_cNotPt_nv_V4 :
return Hexagon::STw_GP_cdnNotPt_nv_V4;
- case Hexagon::STriw_GP_cPt_nv_V4 :
- return Hexagon::STriw_GP_cdnPt_nv_V4;
-
- case Hexagon::STriw_GP_cNotPt_nv_V4 :
- return Hexagon::STriw_GP_cdnNotPt_nv_V4;
-
// Conditional add
case Hexagon::ADD_ri_cPt :
return Hexagon::ADD_ri_cdnPt;
@@ -1623,72 +1450,36 @@ static int GetDotOldOp(const int opc) {
// V4 indexed+scaled Load
- case Hexagon::LDrid_indexed_cdnPt_V4 :
- return Hexagon::LDrid_indexed_cPt_V4;
-
- case Hexagon::LDrid_indexed_cdnNotPt_V4 :
- return Hexagon::LDrid_indexed_cNotPt_V4;
-
case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
return Hexagon::LDrid_indexed_shl_cPt_V4;
case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDrid_indexed_shl_cNotPt_V4;
- case Hexagon::LDrib_indexed_cdnPt_V4 :
- return Hexagon::LDrib_indexed_cPt_V4;
-
- case Hexagon::LDrib_indexed_cdnNotPt_V4 :
- return Hexagon::LDrib_indexed_cNotPt_V4;
-
case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
return Hexagon::LDrib_indexed_shl_cPt_V4;
case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDrib_indexed_shl_cNotPt_V4;
- case Hexagon::LDriub_indexed_cdnPt_V4 :
- return Hexagon::LDriub_indexed_cPt_V4;
-
- case Hexagon::LDriub_indexed_cdnNotPt_V4 :
- return Hexagon::LDriub_indexed_cNotPt_V4;
-
case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
return Hexagon::LDriub_indexed_shl_cPt_V4;
case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDriub_indexed_shl_cNotPt_V4;
- case Hexagon::LDrih_indexed_cdnPt_V4 :
- return Hexagon::LDrih_indexed_cPt_V4;
-
- case Hexagon::LDrih_indexed_cdnNotPt_V4 :
- return Hexagon::LDrih_indexed_cNotPt_V4;
-
case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
return Hexagon::LDrih_indexed_shl_cPt_V4;
case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDrih_indexed_shl_cNotPt_V4;
- case Hexagon::LDriuh_indexed_cdnPt_V4 :
- return Hexagon::LDriuh_indexed_cPt_V4;
-
- case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
- return Hexagon::LDriuh_indexed_cNotPt_V4;
-
case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
return Hexagon::LDriuh_indexed_shl_cPt_V4;
case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
- case Hexagon::LDriw_indexed_cdnPt_V4 :
- return Hexagon::LDriw_indexed_cPt_V4;
-
- case Hexagon::LDriw_indexed_cdnNotPt_V4 :
- return Hexagon::LDriw_indexed_cNotPt_V4;
-
case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
return Hexagon::LDriw_indexed_shl_cPt_V4;
@@ -1733,42 +1524,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::LDw_GP_cdnNotPt_V4:
return Hexagon::LDw_GP_cNotPt_V4;
- case Hexagon::LDrid_GP_cdnPt_V4:
- return Hexagon::LDrid_GP_cPt_V4;
-
- case Hexagon::LDrid_GP_cdnNotPt_V4:
- return Hexagon::LDrid_GP_cNotPt_V4;
-
- case Hexagon::LDrib_GP_cdnPt_V4:
- return Hexagon::LDrib_GP_cPt_V4;
-
- case Hexagon::LDrib_GP_cdnNotPt_V4:
- return Hexagon::LDrib_GP_cNotPt_V4;
-
- case Hexagon::LDriub_GP_cdnPt_V4:
- return Hexagon::LDriub_GP_cPt_V4;
-
- case Hexagon::LDriub_GP_cdnNotPt_V4:
- return Hexagon::LDriub_GP_cNotPt_V4;
-
- case Hexagon::LDrih_GP_cdnPt_V4:
- return Hexagon::LDrih_GP_cPt_V4;
-
- case Hexagon::LDrih_GP_cdnNotPt_V4:
- return Hexagon::LDrih_GP_cNotPt_V4;
-
- case Hexagon::LDriuh_GP_cdnPt_V4:
- return Hexagon::LDriuh_GP_cPt_V4;
-
- case Hexagon::LDriuh_GP_cdnNotPt_V4:
- return Hexagon::LDriuh_GP_cNotPt_V4;
-
- case Hexagon::LDriw_GP_cdnPt_V4:
- return Hexagon::LDriw_GP_cPt_V4;
-
- case Hexagon::LDriw_GP_cdnNotPt_V4:
- return Hexagon::LDriw_GP_cNotPt_V4;
-
// Conditional add
case Hexagon::ADD_ri_cdnPt :
@@ -1902,16 +1657,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STb_GP_cNotPt_nv_V4:
return Hexagon::STb_GP_cNotPt_V4;
- case Hexagon::STrib_GP_cdnPt_nv_V4:
- case Hexagon::STrib_GP_cdnPt_V4:
- case Hexagon::STrib_GP_cPt_nv_V4:
- return Hexagon::STrib_GP_cPt_V4;
-
- case Hexagon::STrib_GP_cdnNotPt_nv_V4:
- case Hexagon::STrib_GP_cdnNotPt_V4:
- case Hexagon::STrib_GP_cNotPt_nv_V4:
- return Hexagon::STrib_GP_cNotPt_V4;
-
// Store new-value byte - unconditional
case Hexagon::STrib_nv_V4:
return Hexagon::STrib;
@@ -1925,9 +1670,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STrib_shl_nv_V4:
return Hexagon::STrib_shl_V4;
- case Hexagon::STrib_GP_nv_V4:
- return Hexagon::STrib_GP_V4;
-
case Hexagon::STb_GP_nv_V4:
return Hexagon::STb_GP_V4;
@@ -1991,16 +1733,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STh_GP_cNotPt_nv_V4:
return Hexagon::STh_GP_cNotPt_V4;
- case Hexagon::STrih_GP_cdnPt_nv_V4:
- case Hexagon::STrih_GP_cdnPt_V4:
- case Hexagon::STrih_GP_cPt_nv_V4:
- return Hexagon::STrih_GP_cPt_V4;
-
- case Hexagon::STrih_GP_cdnNotPt_nv_V4:
- case Hexagon::STrih_GP_cdnNotPt_V4:
- case Hexagon::STrih_GP_cNotPt_nv_V4:
- return Hexagon::STrih_GP_cNotPt_V4;
-
// Store new-value halfword - unconditional
case Hexagon::STrih_nv_V4:
@@ -2015,9 +1747,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STrih_shl_nv_V4:
return Hexagon::STrih_shl_V4;
- case Hexagon::STrih_GP_nv_V4:
- return Hexagon::STrih_GP_V4;
-
case Hexagon::STh_GP_nv_V4:
return Hexagon::STh_GP_V4;
@@ -2082,16 +1811,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STw_GP_cNotPt_nv_V4:
return Hexagon::STw_GP_cNotPt_V4;
- case Hexagon::STriw_GP_cdnPt_nv_V4:
- case Hexagon::STriw_GP_cdnPt_V4:
- case Hexagon::STriw_GP_cPt_nv_V4:
- return Hexagon::STriw_GP_cPt_V4;
-
- case Hexagon::STriw_GP_cdnNotPt_nv_V4:
- case Hexagon::STriw_GP_cdnNotPt_V4:
- case Hexagon::STriw_GP_cNotPt_nv_V4:
- return Hexagon::STriw_GP_cNotPt_V4;
-
// Store new-value word - unconditional
case Hexagon::STriw_nv_V4:
@@ -2106,9 +1825,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STriw_shl_nv_V4:
return Hexagon::STriw_shl_V4;
- case Hexagon::STriw_GP_nv_V4:
- return Hexagon::STriw_GP_V4;
-
case Hexagon::STw_GP_nv_V4:
return Hexagon::STw_GP_V4;
@@ -2147,11 +1863,6 @@ static int GetDotOldOp(const int opc) {
case Hexagon::STd_GP_cdnNotPt_V4 :
return Hexagon::STd_GP_cNotPt_V4;
- case Hexagon::STrid_GP_cdnPt_V4 :
- return Hexagon::STrid_GP_cPt_V4;
-
- case Hexagon::STrid_GP_cdnNotPt_V4 :
- return Hexagon::STrid_GP_cNotPt_V4;
}
}
@@ -2249,28 +1960,16 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::LDriub_indexed_cdnPt :
case Hexagon::POST_LDriub_cPt :
case Hexagon::POST_LDriub_cdnPt_V4 :
- case Hexagon::LDrid_indexed_cPt_V4 :
- case Hexagon::LDrid_indexed_cdnPt_V4 :
case Hexagon::LDrid_indexed_shl_cPt_V4 :
case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrib_indexed_cPt_V4 :
- case Hexagon::LDrib_indexed_cdnPt_V4 :
case Hexagon::LDrib_indexed_shl_cPt_V4 :
case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriub_indexed_cPt_V4 :
- case Hexagon::LDriub_indexed_cdnPt_V4 :
case Hexagon::LDriub_indexed_shl_cPt_V4 :
case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
- case Hexagon::LDrih_indexed_cPt_V4 :
- case Hexagon::LDrih_indexed_cdnPt_V4 :
case Hexagon::LDrih_indexed_shl_cPt_V4 :
case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriuh_indexed_cPt_V4 :
- case Hexagon::LDriuh_indexed_cdnPt_V4 :
case Hexagon::LDriuh_indexed_shl_cPt_V4 :
case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
- case Hexagon::LDriw_indexed_cPt_V4 :
- case Hexagon::LDriw_indexed_cdnPt_V4 :
case Hexagon::LDriw_indexed_shl_cPt_V4 :
case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
case Hexagon::ADD_ri_cPt :
@@ -2299,42 +1998,22 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::ZXTB_cdnPt_V4 :
case Hexagon::ZXTH_cPt_V4 :
case Hexagon::ZXTH_cdnPt_V4 :
- case Hexagon::LDrid_GP_cPt_V4 :
- case Hexagon::LDrib_GP_cPt_V4 :
- case Hexagon::LDriub_GP_cPt_V4 :
- case Hexagon::LDrih_GP_cPt_V4 :
- case Hexagon::LDriuh_GP_cPt_V4 :
- case Hexagon::LDriw_GP_cPt_V4 :
case Hexagon::LDd_GP_cPt_V4 :
case Hexagon::LDb_GP_cPt_V4 :
case Hexagon::LDub_GP_cPt_V4 :
case Hexagon::LDh_GP_cPt_V4 :
case Hexagon::LDuh_GP_cPt_V4 :
case Hexagon::LDw_GP_cPt_V4 :
- case Hexagon::STrid_GP_cPt_V4 :
- case Hexagon::STrib_GP_cPt_V4 :
- case Hexagon::STrih_GP_cPt_V4 :
- case Hexagon::STriw_GP_cPt_V4 :
case Hexagon::STd_GP_cPt_V4 :
case Hexagon::STb_GP_cPt_V4 :
case Hexagon::STh_GP_cPt_V4 :
case Hexagon::STw_GP_cPt_V4 :
- case Hexagon::LDrid_GP_cdnPt_V4 :
- case Hexagon::LDrib_GP_cdnPt_V4 :
- case Hexagon::LDriub_GP_cdnPt_V4 :
- case Hexagon::LDrih_GP_cdnPt_V4 :
- case Hexagon::LDriuh_GP_cdnPt_V4 :
- case Hexagon::LDriw_GP_cdnPt_V4 :
case Hexagon::LDd_GP_cdnPt_V4 :
case Hexagon::LDb_GP_cdnPt_V4 :
case Hexagon::LDub_GP_cdnPt_V4 :
case Hexagon::LDh_GP_cdnPt_V4 :
case Hexagon::LDuh_GP_cdnPt_V4 :
case Hexagon::LDw_GP_cdnPt_V4 :
- case Hexagon::STrid_GP_cdnPt_V4 :
- case Hexagon::STrib_GP_cdnPt_V4 :
- case Hexagon::STrih_GP_cdnPt_V4 :
- case Hexagon::STriw_GP_cdnPt_V4 :
case Hexagon::STd_GP_cdnPt_V4 :
case Hexagon::STb_GP_cdnPt_V4 :
case Hexagon::STh_GP_cdnPt_V4 :
@@ -2420,28 +2099,16 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::LDriub_indexed_cdnNotPt :
case Hexagon::POST_LDriub_cNotPt :
case Hexagon::POST_LDriub_cdnNotPt_V4 :
- case Hexagon::LDrid_indexed_cNotPt_V4 :
- case Hexagon::LDrid_indexed_cdnNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrib_indexed_cNotPt_V4 :
- case Hexagon::LDrib_indexed_cdnNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriub_indexed_cNotPt_V4 :
- case Hexagon::LDriub_indexed_cdnNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrih_indexed_cNotPt_V4 :
- case Hexagon::LDrih_indexed_cdnNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriuh_indexed_cNotPt_V4 :
- case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriw_indexed_cNotPt_V4 :
- case Hexagon::LDriw_indexed_cdnNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
case Hexagon::ADD_ri_cNotPt :
@@ -2471,42 +2138,22 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::ZXTH_cNotPt_V4 :
case Hexagon::ZXTH_cdnNotPt_V4 :
- case Hexagon::LDrid_GP_cNotPt_V4 :
- case Hexagon::LDrib_GP_cNotPt_V4 :
- case Hexagon::LDriub_GP_cNotPt_V4 :
- case Hexagon::LDrih_GP_cNotPt_V4 :
- case Hexagon::LDriuh_GP_cNotPt_V4 :
- case Hexagon::LDriw_GP_cNotPt_V4 :
case Hexagon::LDd_GP_cNotPt_V4 :
case Hexagon::LDb_GP_cNotPt_V4 :
case Hexagon::LDub_GP_cNotPt_V4 :
case Hexagon::LDh_GP_cNotPt_V4 :
case Hexagon::LDuh_GP_cNotPt_V4 :
case Hexagon::LDw_GP_cNotPt_V4 :
- case Hexagon::STrid_GP_cNotPt_V4 :
- case Hexagon::STrib_GP_cNotPt_V4 :
- case Hexagon::STrih_GP_cNotPt_V4 :
- case Hexagon::STriw_GP_cNotPt_V4 :
case Hexagon::STd_GP_cNotPt_V4 :
case Hexagon::STb_GP_cNotPt_V4 :
case Hexagon::STh_GP_cNotPt_V4 :
case Hexagon::STw_GP_cNotPt_V4 :
- case Hexagon::LDrid_GP_cdnNotPt_V4 :
- case Hexagon::LDrib_GP_cdnNotPt_V4 :
- case Hexagon::LDriub_GP_cdnNotPt_V4 :
- case Hexagon::LDrih_GP_cdnNotPt_V4 :
- case Hexagon::LDriuh_GP_cdnNotPt_V4 :
- case Hexagon::LDriw_GP_cdnNotPt_V4 :
case Hexagon::LDd_GP_cdnNotPt_V4 :
case Hexagon::LDb_GP_cdnNotPt_V4 :
case Hexagon::LDub_GP_cdnNotPt_V4 :
case Hexagon::LDh_GP_cdnNotPt_V4 :
case Hexagon::LDuh_GP_cdnNotPt_V4 :
case Hexagon::LDw_GP_cdnNotPt_V4 :
- case Hexagon::STrid_GP_cdnNotPt_V4 :
- case Hexagon::STrib_GP_cdnNotPt_V4 :
- case Hexagon::STrih_GP_cdnNotPt_V4 :
- case Hexagon::STriw_GP_cdnNotPt_V4 :
case Hexagon::STd_GP_cdnNotPt_V4 :
case Hexagon::STb_GP_cdnNotPt_V4 :
case Hexagon::STh_GP_cdnNotPt_V4 :
@@ -2563,28 +2210,16 @@ bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
case Hexagon::POST_LDriub_cdnPt_V4 :
case Hexagon::POST_LDriub_cdnNotPt_V4 :
- case Hexagon::LDrid_indexed_cdnPt_V4 :
- case Hexagon::LDrid_indexed_cdnNotPt_V4 :
case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrib_indexed_cdnPt_V4 :
- case Hexagon::LDrib_indexed_cdnNotPt_V4 :
case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriub_indexed_cdnPt_V4 :
- case Hexagon::LDriub_indexed_cdnNotPt_V4 :
case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDrih_indexed_cdnPt_V4 :
- case Hexagon::LDrih_indexed_cdnNotPt_V4 :
case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriuh_indexed_cdnPt_V4 :
- case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
- case Hexagon::LDriw_indexed_cdnPt_V4 :
- case Hexagon::LDriw_indexed_cdnNotPt_V4 :
case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
@@ -2680,27 +2315,7 @@ bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
case Hexagon::LDuh_GP_cdnNotPt_V4:
case Hexagon::LDw_GP_cdnPt_V4:
case Hexagon::LDw_GP_cdnNotPt_V4:
- case Hexagon::LDrid_GP_cdnPt_V4:
- case Hexagon::LDrid_GP_cdnNotPt_V4:
- case Hexagon::LDrib_GP_cdnPt_V4:
- case Hexagon::LDrib_GP_cdnNotPt_V4:
- case Hexagon::LDriub_GP_cdnPt_V4:
- case Hexagon::LDriub_GP_cdnNotPt_V4:
- case Hexagon::LDrih_GP_cdnPt_V4:
- case Hexagon::LDrih_GP_cdnNotPt_V4:
- case Hexagon::LDriuh_GP_cdnPt_V4:
- case Hexagon::LDriuh_GP_cdnNotPt_V4:
- case Hexagon::LDriw_GP_cdnPt_V4:
- case Hexagon::LDriw_GP_cdnNotPt_V4:
-
- case Hexagon::STrid_GP_cdnPt_V4:
- case Hexagon::STrid_GP_cdnNotPt_V4:
- case Hexagon::STrib_GP_cdnPt_V4:
- case Hexagon::STrib_GP_cdnNotPt_V4:
- case Hexagon::STrih_GP_cdnPt_V4:
- case Hexagon::STrih_GP_cdnNotPt_V4:
- case Hexagon::STriw_GP_cdnPt_V4:
- case Hexagon::STriw_GP_cdnNotPt_V4:
+
case Hexagon::STd_GP_cdnPt_V4:
case Hexagon::STd_GP_cdnNotPt_V4:
case Hexagon::STb_GP_cdnPt_V4:
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index c700354..36da6df 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "HexagonInstPrinter.h"
-#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
-#include "HexagonMCInst.h"
+#include "Hexagon.h"
+#include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
@@ -28,6 +28,8 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
+const char HexagonInstPrinter::PacketPadding = '\t';
+
StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
}
@@ -43,43 +45,42 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
StringRef Annot) {
- const char packetPadding[] = " ";
const char startPacket = '{',
endPacket = '}';
// TODO: add outer HW loop when it's supported too.
if (MI->getOpcode() == Hexagon::ENDLOOP0) {
// Ending a harware loop is different from ending an regular packet.
- assert(MI->isEndPacket() && "Loop end must also end the packet");
+ assert(MI->isPacketEnd() && "Loop-end must also end the packet");
- if (MI->isStartPacket()) {
+ if (MI->isPacketStart()) {
// There must be a packet to end a loop.
// FIXME: when shuffling is always run, this shouldn't be needed.
HexagonMCInst Nop;
StringRef NoAnnot;
Nop.setOpcode (Hexagon::NOP);
- Nop.setStartPacket (MI->isStartPacket());
+ Nop.setPacketStart (MI->isPacketStart());
printInst (&Nop, O, NoAnnot);
}
// Close the packet.
- if (MI->isEndPacket())
- O << packetPadding << endPacket;
+ if (MI->isPacketEnd())
+ O << PacketPadding << endPacket;
printInstruction(MI, O);
}
else {
// Prefix the insn opening the packet.
- if (MI->isStartPacket())
- O << packetPadding << startPacket << '\n';
+ if (MI->isPacketStart())
+ O << PacketPadding << startPacket << '\n';
printInstruction(MI, O);
// Suffix the insn closing the packet.
- if (MI->isEndPacket())
+ if (MI->isPacketEnd())
// Suffix the packet in a new line always, since the GNU assembler has
// issues with a closing brace on the same line as CONST{32,64}.
- O << '\n' << packetPadding << endPacket;
+ O << '\n' << PacketPadding << endPacket;
}
printAnnotation(O, Annot);
@@ -102,12 +103,23 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
- O << MI->getOperand(OpNo).getImm();
+ const MCOperand& MO = MI->getOperand(OpNo);
+
+ if(MO.isExpr()) {
+ O << *MO.getExpr();
+ } else if(MO.isImm()) {
+ O << MI->getOperand(OpNo).getImm();
+ } else {
+ llvm_unreachable("Unknown operand");
+ }
}
void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) const {
- O << MI->getOperand(OpNo).getImm();
+ const HexagonMCInst *HMCI = static_cast<const HexagonMCInst*>(MI);
+ if (HMCI->isConstExtended())
+ O << "#";
+ printOperand(MI, OpNo, O);
}
void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
index 902a323..d0cef68 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -14,16 +14,18 @@
#ifndef HEXAGONINSTPRINTER_H
#define HEXAGONINSTPRINTER_H
-#include "HexagonMCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
+ class HexagonMCInst;
+
class HexagonInstPrinter : public MCInstPrinter {
public:
explicit HexagonInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
+ : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
@@ -65,10 +67,19 @@ namespace llvm {
void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
{ printSymbol(MI, OpNo, O, false); }
- bool isConstExtended(const MCInst *MI) const;
+ const MCInstrInfo &getMII() const {
+ return MII;
+ }
+
protected:
void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
const;
+
+ static const char PacketPadding;
+
+ private:
+ const MCInstrInfo &MII;
+
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
index 8678401..59849aa 100644
--- a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = HexagonAsmPrinter
parent = Hexagon
-required_libraries = MC Support
+required_libraries = HexagonDesc MC Support
add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index 8e3da99..62b9b60 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMHexagonDesc
- HexagonMCTargetDesc.cpp
HexagonMCAsmInfo.cpp
+ HexagonMCInst.cpp
+ HexagonMCTargetDesc.cpp
)
add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 9fc826f..5f9718b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -17,6 +17,9 @@
#ifndef HEXAGONBASEINFO_H
#define HEXAGONBASEINFO_H
+#include "HexagonMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+
namespace llvm {
/// HexagonII - This namespace holds all of the target specific flags that
@@ -28,19 +31,19 @@ namespace HexagonII {
// Insn types.
// *** Must match HexagonInstrFormat*.td ***
enum Type {
- TypePSEUDO = 0,
- TypeALU32 = 1,
- TypeCR = 2,
- TypeJR = 3,
- TypeJ = 4,
- TypeLD = 5,
- TypeST = 6,
- TypeSYSTEM = 7,
- TypeXTYPE = 8,
- TypeMEMOP = 9,
- TypeNV = 10,
- TypePREFIX = 30, // Such as extenders.
- TypeMARKER = 31 // Such as end of a HW loop.
+ TypePSEUDO = 0,
+ TypeALU32 = 1,
+ TypeCR = 2,
+ TypeJR = 3,
+ TypeJ = 4,
+ TypeLD = 5,
+ TypeST = 6,
+ TypeSYSTEM = 7,
+ TypeXTYPE = 8,
+ TypeMEMOP = 9,
+ TypeNV = 10,
+ TypePREFIX = 30, // Such as extenders.
+ TypeENDLOOP = 31 // Such as end of a HW loop.
};
enum SubTarget {
@@ -65,6 +68,14 @@ namespace HexagonII {
BaseRegOffset = 5 // Indirect with register offset
};
+ enum MemAccessSize {
+ NoMemAccess = 0, // Not a memory acces instruction.
+ ByteAccess = 1, // Byte access instruction (memb).
+ HalfWordAccess = 2, // Half word access instruction (memh).
+ WordAccess = 3, // Word access instrution (memw).
+ DoubleWordAccess = 4 // Double word access instruction (memd)
+ };
+
// MCInstrDesc TSFlags
// *** Must match HexagonInstrFormat*.td ***
enum {
@@ -79,46 +90,67 @@ namespace HexagonII {
// Predicated instructions.
PredicatedPos = 6,
PredicatedMask = 0x1,
- PredicatedNewPos = 7,
+ PredicatedFalsePos = 7,
+ PredicatedFalseMask = 0x1,
+ PredicatedNewPos = 8,
PredicatedNewMask = 0x1,
- // Stores that can be newified.
- mayNVStorePos = 8,
+ // New-Value consumer instructions.
+ NewValuePos = 9,
+ NewValueMask = 0x1,
+
+ // New-Value producer instructions.
+ hasNewValuePos = 10,
+ hasNewValueMask = 0x1,
+
+ // Which operand consumes or produces a new value.
+ NewValueOpPos = 11,
+ NewValueOpMask = 0x7,
+
+ // Which bits encode the new value.
+ NewValueBitsPos = 14,
+ NewValueBitsMask = 0x3,
+
+ // Stores that can become new-value stores.
+ mayNVStorePos = 16,
mayNVStoreMask = 0x1,
- // Dot new value store instructions.
- NVStorePos = 9,
+ // New-value store instructions.
+ NVStorePos = 17,
NVStoreMask = 0x1,
// Extendable insns.
- ExtendablePos = 10,
+ ExtendablePos = 18,
ExtendableMask = 0x1,
// Insns must be extended.
- ExtendedPos = 11,
+ ExtendedPos = 19,
ExtendedMask = 0x1,
// Which operand may be extended.
- ExtendableOpPos = 12,
+ ExtendableOpPos = 20,
ExtendableOpMask = 0x7,
// Signed or unsigned range.
- ExtentSignedPos = 15,
+ ExtentSignedPos = 23,
ExtentSignedMask = 0x1,
// Number of bits of range before extending operand.
- ExtentBitsPos = 16,
+ ExtentBitsPos = 24,
ExtentBitsMask = 0x1f,
// Valid subtargets
- validSubTargetPos = 21,
+ validSubTargetPos = 29,
validSubTargetMask = 0xf,
- // Addressing mode for load/store instructions
- AddrModePos = 25,
- AddrModeMask = 0xf
+ // Addressing mode for load/store instructions.
+ AddrModePos = 33,
+ AddrModeMask = 0x7,
- };
+ // Access size of memory access instructions (load/store).
+ MemAccessSizePos = 36,
+ MemAccesSizeMask = 0x7
+ };
// *** The code above must match HexagonInstrFormat*.td *** //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
new file mode 100644
index 0000000..9260b4a
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp
@@ -0,0 +1,175 @@
+//===- HexagonMCInst.cpp - Hexagon sub-class of MCInst --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some Hexagon VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCInst.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+
+using namespace llvm;
+
+// Return the slots used by the insn.
+unsigned HexagonMCInst::getUnits(const HexagonTargetMachine* TM) const {
+ const HexagonInstrInfo* QII = TM->getInstrInfo();
+ const InstrItineraryData* II = TM->getInstrItineraryData();
+ const InstrStage*
+ IS = II->beginStage(QII->get(this->getOpcode()).getSchedClass());
+
+ return (IS->getUnits());
+}
+
+// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInst::getType() const {
+ const uint64_t F = MCID->TSFlags;
+
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
+// Return whether the insn is an actual insn.
+bool HexagonMCInst::isCanon() const {
+ return (!MCID->isPseudo() &&
+ !isPrefix() &&
+ getType() != HexagonII::TypeENDLOOP);
+}
+
+// Return whether the insn is a prefix.
+bool HexagonMCInst::isPrefix() const {
+ return (getType() == HexagonII::TypePREFIX);
+}
+
+// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInst::isSolo() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
+}
+
+// Return whether the insn is a new-value consumer.
+bool HexagonMCInst::isNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
+// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInst::hasNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
+}
+
+// Return the operand that consumes or produces a new value.
+const MCOperand& HexagonMCInst::getNewValue() const {
+ const uint64_t F = MCID->TSFlags;
+ const unsigned O = (F >> HexagonII::NewValueOpPos) &
+ HexagonII::NewValueOpMask;
+ const MCOperand& MCO = getOperand(O);
+
+ assert ((isNewValue() || hasNewValue()) && MCO.isReg());
+ return (MCO);
+}
+
+// Return whether the instruction needs to be constant extended.
+// 1) Always return true if the instruction has 'isExtended' flag set.
+//
+// isExtendable:
+// 2) For immediate extended operands, return true only if the value is
+// out-of-range.
+// 3) For global address, always return true.
+
+bool HexagonMCInst::isConstExtended(void) const {
+ if (isExtended())
+ return true;
+
+ if (!isExtendable())
+ return false;
+
+ short ExtOpNum = getCExtOpNum();
+ int MinValue = getMinValue();
+ int MaxValue = getMaxValue();
+ const MCOperand& MO = getOperand(ExtOpNum);
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isExpr())
+ return true;
+
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int ImmValue = MO.getImm();
+ return (ImmValue < MinValue || ImmValue > MaxValue);
+}
+
+// Return whether the instruction must be always extended.
+bool HexagonMCInst::isExtended(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+}
+
+// Return true if the instruction may be extended based on the operand value.
+bool HexagonMCInst::isExtendable(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+}
+
+// Return number of bits in the constant extended operand.
+unsigned HexagonMCInst::getBitCount(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
+}
+
+// Return constant extended operand number.
+unsigned short HexagonMCInst::getCExtOpNum(void) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+}
+
+// Return whether the operand can be constant extended.
+bool HexagonMCInst::isOperandExtended(const unsigned short OperandNum) const {
+ const uint64_t F = MCID->TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
+}
+
+// Return the min value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMinValue(void) const {
+ const uint64_t F = MCID->TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return -1 << (bits - 1);
+ else
+ return 0;
+}
+
+// Return the max value that a constant extendable operand can have
+// without being extended.
+int HexagonMCInst::getMaxValue(void) const {
+ const uint64_t F = MCID->TSFlags;
+ unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+ & HexagonII::ExtentSignedMask;
+ unsigned bits = (F >> HexagonII::ExtentBitsPos)
+ & HexagonII::ExtentBitsMask;
+
+ if (isSigned) // if value is signed
+ return ~(-1 << (bits - 1));
+ else
+ return ~(-1 << bits);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
new file mode 100644
index 0000000..3ca71f0
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -0,0 +1,100 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "HexagonTargetMachine.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class HexagonMCInst: public MCInst {
+ // MCID is set during instruction lowering.
+ // It is needed in order to access TSFlags for
+ // use in checking MC instruction properties.
+ const MCInstrDesc *MCID;
+
+ // Packet start and end markers
+ unsigned packetStart: 1, packetEnd: 1;
+
+ public:
+ explicit HexagonMCInst():
+ MCInst(), MCID(0), packetStart(0), packetEnd(0) {};
+ HexagonMCInst(const MCInstrDesc& mcid):
+ MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {};
+
+ bool isPacketStart() const { return (packetStart); };
+ bool isPacketEnd() const { return (packetEnd); };
+ void setPacketStart(bool Y) { packetStart = Y; };
+ void setPacketEnd(bool Y) { packetEnd = Y; };
+ void resetPacket() { setPacketStart(false); setPacketEnd(false); };
+
+ // Return the slots used by the insn.
+ unsigned getUnits(const HexagonTargetMachine* TM) const;
+
+ // Return the Hexagon ISA class for the insn.
+ unsigned getType() const;
+
+ void setDesc(const MCInstrDesc& mcid) { MCID = &mcid; };
+ const MCInstrDesc& getDesc(void) const { return *MCID; };
+
+ // Return whether the insn is an actual insn.
+ bool isCanon() const;
+
+ // Return whether the insn is a prefix.
+ bool isPrefix() const;
+
+ // Return whether the insn is solo, i.e., cannot be in a packet.
+ bool isSolo() const;
+
+ // Return whether the instruction needs to be constant extended.
+ bool isConstExtended() const;
+
+ // Return constant extended operand number.
+ unsigned short getCExtOpNum(void) const;
+
+ // Return whether the insn is a new-value consumer.
+ bool isNewValue() const;
+
+ // Return whether the instruction is a legal new-value producer.
+ bool hasNewValue() const;
+
+ // Return the operand that consumes or produces a new value.
+ const MCOperand& getNewValue() const;
+
+ // Return number of bits in the constant extended operand.
+ unsigned getBitCount(void) const;
+
+ private:
+ // Return whether the instruction must be always extended.
+ bool isExtended() const;
+
+ // Return true if the insn may be extended based on the operand value.
+ bool isExtendable() const;
+
+ // Return true if the operand can be constant extended.
+ bool isOperandExtended(const unsigned short OperandNum) const;
+
+ // Return the min value that a constant extendable operand can have
+ // without being extended.
+ int getMinValue() const;
+
+ // Return the max value that a constant extendable operand can have
+ // without being extended.
+ int getMaxValue() const;
+ };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 737789b..6b1d2d1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -13,11 +13,13 @@
#include "HexagonMCTargetDesc.h"
#include "HexagonMCAsmInfo.h"
+#include "InstPrinter/HexagonInstPrinter.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index f3a9c1c..c06e8bc 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
+subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 2ab163e..ad495ff 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -451,7 +451,7 @@ MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
case AsmToken::Minus:
case AsmToken::Integer:
case AsmToken::Identifier:
- if (getParser().ParseExpression(EVal))
+ if (getParser().parseExpression(EVal))
return 0;
return MBlazeOperand::CreateImm(EVal, S, E);
@@ -537,10 +537,10 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- if (getParser().ParseExpression(Value))
+ if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index b6edbba..172304b 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -426,6 +426,45 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+// Eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+ // 'addi r1, r1, <amt>'
+ MachineInstr *Old = I;
+ int Amount = Old->getOperand(0).getImm() + 4;
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+ New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+ New = BuildMI(MF,Old->getDebugLoc(), TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+
void MBlazeFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 01e6578..f4228c5 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -39,6 +39,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool hasFP(const MachineFunction &MF) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 8a9f092..7664c60 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -81,6 +81,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FLOG, MVT::f32, Expand);
@@ -1027,15 +1028,17 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // If this function is using the interrupt_handler calling convention
+ // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+ unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
+ RetOps.push_back(DAG.getRegister(Reg, MVT::i32));
+
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1048,20 +1051,16 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// guarantee that all emitted copies are
// stuck together, avoiding something bad
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // If this function is using the interrupt_handler calling convention
- // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
- unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
- : MBlazeISD::Ret;
- unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
- : MBlaze::R15;
- SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+ RetOps[0] = Chain; // Update chain.
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+ RetOps.push_back(Flag);
- return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+ return DAG.getNode(Ret, dl, MVT::Other, &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 139bf71..f86bc0b 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -28,9 +28,9 @@ def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
//===----------------------------------------------------------------------===//
def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
[SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index ed06cc4..d0fd7dc 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -83,67 +83,21 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
-void MBlazeRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have a frame pointer, turn the adjcallstackup instruction into a
- // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
- // 'addi r1, r1, <amt>'
- MachineInstr *Old = I;
- int Amount = Old->getOperand(0).getImm() + 4;
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- MachineInstr *New;
- if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
- New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
- .addReg(MBlaze::R1).addImm(-Amount);
- } else {
- assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
- New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
- .addReg(MBlaze::R1).addImm(Amount);
- }
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
-
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
void MBlazeRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FIOperandNum, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
-
- unsigned i = 0;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned oi = i == 2 ? 1 : 2;
+ unsigned OFIOperandNum = FIOperandNum == 2 ? 1 : 2;
DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n";
dbgs() << "<--------->\n" << MI);
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
int stackSize = MFI->getStackSize();
int spOffset = MFI->getObjectOffset(FrameIndex);
@@ -159,12 +113,12 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// as explained on LowerFormalArguments, detect negative offsets
// and adjust SPOffsets considering the final stack size.
int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
- Offset += MI.getOperand(oi).getImm();
+ Offset += MI.getOperand(OFIOperandNum).getImm();
DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n");
- MI.getOperand(oi).ChangeToImmediate(Offset);
- MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+ MI.getOperand(OFIOperandNum).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
}
void MBlazeRegisterInfo::
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 1d51162..99a2fac 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -50,13 +50,10 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 2e328cb..3c95760 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
- PointerSize = 2;
+ PointerSize = CalleeSaveStackSlotSize = 2;
PrivateGlobalPrefix = ".L";
WeakRefDirective ="\t.weak\t";
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index aef45d8..ae2e556 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -222,13 +222,73 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+void MSP430FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+ unsigned StackAlign = getStackAlignment();
+
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
void
MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
// Create a frame entry for the FPW register that must be saved.
- if (TFI->hasFP(MF)) {
+ if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
(void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index cb02545..a077dd7 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -35,6 +35,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5a156c1..09cdf32 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -423,15 +423,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -444,16 +437,19 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+ RetOps.push_back(Flag);
- // Return Void
- return DAG.getNode(Opc, dl, MVT::Other, Chain);
+ return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size());
}
/// LowerCCCCallTo - functions arguments are copied from virtual regs to
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index f003574..e45780d 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -40,9 +40,9 @@ def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
// MSP430 Specific Node Definitions.
//===----------------------------------------------------------------------===//
def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 8f7813a..0b3e9e2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -101,83 +101,18 @@ MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &MSP430::GR16RegClass;
}
-void MSP430RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub SPW, <amt>' and the
- // adjcallstackdown instruction into 'add SPW, <amt>'
- // TODO: consider using push / pop instead of sub + store / add
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
-
- MachineInstr *New = 0;
- if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
- } else {
- assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
- // factor out the amount the callee already popped.
- uint64_t CalleeAmt = Old->getOperand(1).getImm();
- Amount -= CalleeAmt;
- if (Amount)
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(Amount);
- }
-
- if (New) {
- // The SRW implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
- } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back.
- if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- MachineInstr *Old = I;
- MachineInstr *New =
- BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
- MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
- // The SRW implicit def is dead.
- New->getOperand(3).setIsDead();
-
- MBB.insert(I, New);
- }
- }
-
- MBB.erase(I);
-}
-
void
MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
@@ -191,7 +126,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += 2; // Skip the saved FPW
// Fold imm into offset
- Offset += MI.getOperand(i+1).getImm();
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
if (MI.getOpcode() == MSP430::ADD16ri) {
// This is actually "load effective address" of the stack slot
@@ -199,7 +134,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// expand it into mov + add
MI.setDesc(TII.get(MSP430::MOV16rr));
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
if (Offset == 0)
return;
@@ -216,8 +151,8 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 64a43bc..69cccb2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -42,12 +42,9 @@ public:
const TargetRegisterClass*
getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 085503eb..ade6084 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -84,15 +84,30 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID);
MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
- int tryParseRegister(StringRef Mnemonic);
+ int tryParseRegister(bool is64BitReg);
bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic);
+ bool is64BitReg);
bool needsExpansion(MCInst &Inst);
@@ -107,7 +122,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res);
- bool parseRelocOperand(const MCExpr *&Res, SMLoc &E);
+ bool parseRelocOperand(const MCExpr *&Res);
bool parseDirectiveSet();
@@ -118,6 +133,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
bool isMips64() const {
@@ -128,9 +145,11 @@ class MipsAsmParser : public MCTargetAsmParser {
return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
}
- int matchRegisterName(StringRef Symbol);
+ int matchRegisterName(StringRef Symbol, bool is64BitReg);
- int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic);
+ int matchCPURegisterName(StringRef Symbol);
+
+ int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
void setFpFormat(FpFormatTy Format) {
FpFormat = Format;
@@ -146,7 +165,7 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getReg(int RC,int RegNo);
- unsigned getATReg();
+ int getATReg();
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -166,6 +185,20 @@ namespace {
/// instruction.
class MipsOperand : public MCParsedAsmOperand {
+public:
+ enum RegisterKind {
+ Kind_None,
+ Kind_CPURegs,
+ Kind_CPU64Regs,
+ Kind_HWRegs,
+ Kind_HW64Regs,
+ Kind_FGR32Regs,
+ Kind_FGR64Regs,
+ Kind_AFGR64Regs,
+ Kind_CCRRegs
+ };
+
+private:
enum KindTy {
k_CondCode,
k_CoprocNum,
@@ -186,6 +219,7 @@ class MipsOperand : public MCParsedAsmOperand {
struct {
unsigned RegNum;
+ RegisterKind Kind;
} Reg;
struct {
@@ -246,6 +280,11 @@ public:
return Reg.RegNum;
}
+ void setRegKind(RegisterKind RegKind) {
+ assert((Kind == k_Register) && "Invalid access!");
+ Reg.Kind = RegKind;
+ }
+
const MCExpr *getImm() const {
assert((Kind == k_Immediate) && "Invalid access!");
return Imm.Val;
@@ -296,6 +335,45 @@ public:
return Op;
}
+ bool isCPURegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPURegs;
+ }
+ void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCPU64RegsAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_CPU64Regs;
+ }
+ void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHWRegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HWRegs;
+ }
+ void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isHW64RegsAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_HW64Regs;
+ }
+ void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ void addCCRAsmOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(Reg.RegNum));
+ }
+
+ bool isCCRAsm() const {
+ assert((Kind == k_Register) && "Invalid access!");
+ return Reg.Kind == Kind_CCRRegs;
+ }
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
@@ -344,31 +422,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
if ( 0 <= ImmValue && ImmValue <= 65535) {
// for 0 <= j <= 65535.
// li d,j => ori d,$zero,j
- tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else if ( ImmValue < 0 && ImmValue >= -32768) {
// for -32768 <= j < 0.
// li d,j => addiu d,$zero,j
- tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+ tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
// for any other value of j that is representable as a 32-bit integer.
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
- tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -390,7 +468,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
if ( -32768 <= ImmValue && ImmValue <= 65535) {
//for -32768 <= j <= 65535.
//la d,j(s) => addiu d,s,j
- tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files?
+ tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
@@ -400,12 +478,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
//la d,j(s) => lui d,hi16(j)
// ori d,d,lo16(j)
// addu d,d,s
- tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -433,19 +511,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(
- MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO));
+ MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
//for any other value of j that is representable as a 32-bit integer.
//la d,j => lui d,hi16(j)
// ori d,d,lo16(j)
- tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi);
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
tmpInst.clear();
- tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi);
+ tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
@@ -498,84 +576,72 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
-int MipsAsmParser::matchRegisterName(StringRef Name) {
-
+int MipsAsmParser::matchCPURegisterName(StringRef Name) {
int CC;
- if (!isMips64())
+
+ if (Name == "at")
+ return getATReg();
+
CC = StringSwitch<unsigned>(Name)
- .Case("zero", Mips::ZERO)
- .Case("a0", Mips::A0)
- .Case("a1", Mips::A1)
- .Case("a2", Mips::A2)
- .Case("a3", Mips::A3)
- .Case("v0", Mips::V0)
- .Case("v1", Mips::V1)
- .Case("s0", Mips::S0)
- .Case("s1", Mips::S1)
- .Case("s2", Mips::S2)
- .Case("s3", Mips::S3)
- .Case("s4", Mips::S4)
- .Case("s5", Mips::S5)
- .Case("s6", Mips::S6)
- .Case("s7", Mips::S7)
- .Case("k0", Mips::K0)
- .Case("k1", Mips::K1)
- .Case("sp", Mips::SP)
- .Case("fp", Mips::FP)
- .Case("gp", Mips::GP)
- .Case("ra", Mips::RA)
- .Case("t0", Mips::T0)
- .Case("t1", Mips::T1)
- .Case("t2", Mips::T2)
- .Case("t3", Mips::T3)
- .Case("t4", Mips::T4)
- .Case("t5", Mips::T5)
- .Case("t6", Mips::T6)
- .Case("t7", Mips::T7)
- .Case("t8", Mips::T8)
- .Case("t9", Mips::T9)
- .Case("at", Mips::AT)
- .Case("fcc0", Mips::FCC0)
- .Default(-1);
- else
+ .Case("zero", 0)
+ .Case("a0", 4)
+ .Case("a1", 5)
+ .Case("a2", 6)
+ .Case("a3", 7)
+ .Case("v0", 2)
+ .Case("v1", 3)
+ .Case("s0", 16)
+ .Case("s1", 17)
+ .Case("s2", 18)
+ .Case("s3", 19)
+ .Case("s4", 20)
+ .Case("s5", 21)
+ .Case("s6", 22)
+ .Case("s7", 23)
+ .Case("k0", 26)
+ .Case("k1", 27)
+ .Case("sp", 29)
+ .Case("fp", 30)
+ .Case("gp", 28)
+ .Case("ra", 31)
+ .Case("t0", 8)
+ .Case("t1", 9)
+ .Case("t2", 10)
+ .Case("t3", 11)
+ .Case("t4", 12)
+ .Case("t5", 13)
+ .Case("t6", 14)
+ .Case("t7", 15)
+ .Case("t8", 24)
+ .Case("t9", 25)
+ .Default(-1);
+
+ // Although SGI documentation just cut out t0-t3 for n32/n64,
+ // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
+ // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
+ if (isMips64() && 8 <= CC && CC <= 11)
+ CC += 4;
+
+ if (CC == -1 && isMips64())
CC = StringSwitch<unsigned>(Name)
- .Case("zero", Mips::ZERO_64)
- .Case("at", Mips::AT_64)
- .Case("v0", Mips::V0_64)
- .Case("v1", Mips::V1_64)
- .Case("a0", Mips::A0_64)
- .Case("a1", Mips::A1_64)
- .Case("a2", Mips::A2_64)
- .Case("a3", Mips::A3_64)
- .Case("a4", Mips::T0_64)
- .Case("a5", Mips::T1_64)
- .Case("a6", Mips::T2_64)
- .Case("a7", Mips::T3_64)
- .Case("t4", Mips::T4_64)
- .Case("t5", Mips::T5_64)
- .Case("t6", Mips::T6_64)
- .Case("t7", Mips::T7_64)
- .Case("s0", Mips::S0_64)
- .Case("s1", Mips::S1_64)
- .Case("s2", Mips::S2_64)
- .Case("s3", Mips::S3_64)
- .Case("s4", Mips::S4_64)
- .Case("s5", Mips::S5_64)
- .Case("s6", Mips::S6_64)
- .Case("s7", Mips::S7_64)
- .Case("t8", Mips::T8_64)
- .Case("t9", Mips::T9_64)
- .Case("kt0", Mips::K0_64)
- .Case("kt1", Mips::K1_64)
- .Case("gp", Mips::GP_64)
- .Case("sp", Mips::SP_64)
- .Case("fp", Mips::FP_64)
- .Case("s8", Mips::FP_64)
- .Case("ra", Mips::RA_64)
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Case("s8", 30)
.Default(-1);
+ return CC;
+}
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+
+ int CC;
+ CC = matchCPURegisterName(Name);
if (CC != -1)
- return CC;
+ return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID);
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
@@ -639,75 +705,49 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
return true;
}
-unsigned MipsAsmParser::getATReg() {
- unsigned Reg = Options.getATRegNum();
- if (isMips64())
- return getReg(Mips::CPU64RegsRegClassID,Reg);
-
- return getReg(Mips::CPURegsRegClassID,Reg);
+int MipsAsmParser::getATReg() {
+ return Options.getATRegNum();
}
unsigned MipsAsmParser::getReg(int RC,int RegNo) {
return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
}
-int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) {
-
- if (Mnemonic.lower() == "rdhwr") {
- // at the moment only hwreg29 is supported
- if (RegNum != 29)
- return -1;
- return Mips::HWR29;
- }
+int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
if (RegNum > 31)
return -1;
- // MIPS64 registers are numbered 1 after the 32-bit equivalents
- return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64();
+ return getReg(RegClass, RegNum);
}
-int MipsAsmParser::tryParseRegister(StringRef Mnemonic) {
+int MipsAsmParser::tryParseRegister(bool is64BitReg) {
const AsmToken &Tok = Parser.getTok();
int RegNum = -1;
if (Tok.is(AsmToken::Identifier)) {
std::string lowerCase = Tok.getString().lower();
- RegNum = matchRegisterName(lowerCase);
+ RegNum = matchRegisterName(lowerCase, is64BitReg);
} else if (Tok.is(AsmToken::Integer))
RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
- Mnemonic.lower());
- else
- return RegNum; //error
- // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64
- if (isMips64() && RegNum == Mips::ZERO_64) {
- if (Mnemonic.find("ddiv") != StringRef::npos)
- RegNum = Mips::ZERO;
- }
+ is64BitReg ? Mips::CPU64RegsRegClassID
+ : Mips::CPURegsRegClassID);
return RegNum;
}
bool MipsAsmParser::
tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic){
+ bool is64BitReg){
SMLoc S = Parser.getTok().getLoc();
- SMLoc E = Parser.getTok().getEndLoc();
int RegNo = -1;
- // FIXME: we should make a more generic method for CCR
- if ((Mnemonic == "cfc1" || Mnemonic == "ctc1")
- && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){
- RegNo = Parser.getTok().getIntVal(); // get the int value
- // at the moment only fcc0 is supported
- if (RegNo == 0)
- RegNo = Mips::FCC0;
- } else
- RegNo = tryParseRegister(Mnemonic);
+ RegNo = tryParseRegister(is64BitReg);
if (RegNo == -1)
return true;
- Operands.push_back(MipsOperand::CreateReg(RegNo, S, E));
+ Operands.push_back(MipsOperand::CreateReg(RegNo, S,
+ Parser.getTok().getLoc()));
Parser.Lex(); // Eat register token.
return false;
}
@@ -734,7 +774,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat dollar token.
// parse register operand
- if (!tryParseRegisterOperand(Operands, Mnemonic)) {
+ if (!tryParseRegisterOperand(Operands, isMips64())) {
if (getLexer().is(AsmToken::LParen)) {
// check if it is indexed addressing operand
Operands.push_back(MipsOperand::CreateToken("(", S));
@@ -743,7 +783,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
Parser.Lex(); // eat dollar
- if (tryParseRegisterOperand(Operands, Mnemonic))
+ if (tryParseRegisterOperand(Operands, isMips64()))
return true;
if (!getLexer().is(AsmToken::RParen))
@@ -757,10 +797,10 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
}
// maybe it is a symbol reference
StringRef Identifier;
- if (Parser.ParseIdentifier(Identifier))
+ if (Parser.parseIdentifier(Identifier))
return true;
- SMLoc E = SMLoc::getFromPointer(Identifier.end());
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
@@ -780,9 +820,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
// quoted label names
const MCExpr *IdVal;
SMLoc S = Parser.getTok().getLoc();
- SMLoc E;
- if (getParser().ParseExpression(IdVal, E))
+ if (getParser().parseExpression(IdVal))
return true;
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return false;
}
@@ -790,10 +830,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
// it is a symbol reference or constant expression
const MCExpr *IdVal;
SMLoc S = Parser.getTok().getLoc(); // start location of the operand
- SMLoc E;
- if (parseRelocOperand(IdVal, E))
+ if (parseRelocOperand(IdVal))
return true;
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return false;
} // case AsmToken::Percent
@@ -801,7 +842,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
}
-bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) {
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
Parser.Lex(); // eat % token
const AsmToken &Tok = Parser.getTok(); // get next token, operation
@@ -813,6 +854,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) {
Parser.Lex(); // eat identifier
// now make expression from the rest of the operand
const MCExpr *IdVal;
+ SMLoc EndLoc;
if (getLexer().getKind() == AsmToken::LParen) {
while (1) {
@@ -830,13 +872,11 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) {
} else
break;
}
- if (getParser().ParseParenExpression(IdVal,EndLoc))
+ if (getParser().parseParenExpression(IdVal,EndLoc))
return true;
- while (getLexer().getKind() == AsmToken::RParen) {
- EndLoc = Parser.getTok().getEndLoc();
+ while (getLexer().getKind() == AsmToken::RParen)
Parser.Lex(); // eat ')' token
- }
} else
return true; // parenthesis must follow reloc operand
@@ -848,7 +888,12 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res, SMLoc &EndLoc) {
if (Str == "lo") {
Val = Val & 0xffff;
} else if (Str == "hi") {
+ int LoSign = Val & 0x8000;
Val = (Val & 0xffff0000) >> 16;
+ //lower part is treated as signed int, so if it is negative
+ //we must add 1 to hi part to compensate
+ if (LoSign)
+ Val++;
}
Res = MCConstantExpr::Create(Val, getContext());
return false;
@@ -868,23 +913,24 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = Parser.getTok().getLoc();
- EndLoc = Parser.getTok().getEndLoc();
- RegNo = tryParseRegister("");
+ RegNo = tryParseRegister(isMips64());
+ EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
+
+ SMLoc S;
+
switch(getLexer().getKind()) {
default:
return true;
case AsmToken::Integer:
case AsmToken::Minus:
case AsmToken::Plus:
- return getParser().ParseExpression(Res);
- case AsmToken::Percent: {
- SMLoc E;
- return parseRelocOperand(Res, E);
- }
+ return (getParser().parseExpression(Res));
+ case AsmToken::Percent:
+ return parseRelocOperand(Res);
case AsmToken::LParen:
return false; // it's probably assuming 0
}
@@ -895,8 +941,9 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
const MCExpr *IdVal = 0;
- SMLoc S = Parser.getTok().getLoc();
- SMLoc E = Parser.getTok().getEndLoc();
+ SMLoc S;
+ // first operand is the offset
+ S = Parser.getTok().getLoc();
if (parseMemOffset(IdVal))
return MatchOperand_ParseFail;
@@ -905,6 +952,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
if (Tok.isNot(AsmToken::LParen)) {
MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
if (Mnemonic->getToken() == "la") {
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return MatchOperand_Success;
}
@@ -917,7 +965,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok1 = Parser.getTok(); // get next token
if (Tok1.is(AsmToken::Dollar)) {
Parser.Lex(); // Eat '$' token.
- if (tryParseRegisterOperand(Operands,"")) {
+ if (tryParseRegisterOperand(Operands, isMips64())) {
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return MatchOperand_ParseFail;
}
@@ -933,7 +981,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_ParseFail;
}
- E = Parser.getTok().getEndLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
Parser.Lex(); // Eat ')' token.
if (IdVal == 0)
@@ -950,6 +999,132 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, true)) {
+ // set the proper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPU64Regs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ // if the first token is not '$' we have an error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat $
+ if(!tryParseRegisterOperand(Operands, false)) {
+ // set the propper register kind
+ MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ op->setRegKind(MipsOperand::Kind_CPURegs);
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (isMips64())
+ return MatchOperand_NoMatch;
+
+ // if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HWRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned RegNum = Tok.getIntVal();
+ // at the moment only hwreg29 is supported
+ if (RegNum != 29)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_HW64Regs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ unsigned RegNum;
+ //if the first token is not '$' we have error
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat $
+
+ const AsmToken &Tok = Parser.getTok(); // get next token
+ if (Tok.is(AsmToken::Integer)) {
+ RegNum = Tok.getIntVal();
+ // at the moment only fcc0 is supported
+ if (RegNum != 0)
+ return MatchOperand_ParseFail;
+ } else if (Tok.is(AsmToken::Identifier)) {
+ // at the moment only fcc0 is supported
+ if (Tok.getIdentifier() != "fcc0")
+ return MatchOperand_ParseFail;
+ } else
+ return MatchOperand_NoMatch;
+
+ MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
+ Parser.getTok().getLoc());
+ op->setRegKind(MipsOperand::Kind_CCRRegs);
+ Operands.push_back(op);
+
+ Parser.Lex(); // Eat reg number
+ return MatchOperand_Success;
+}
+
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
MCSymbolRefExpr::VariantKind VK
@@ -1019,13 +1194,13 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
// Read the first operand.
if (ParseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
if (getLexer().isNot(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1037,14 +1212,14 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
// Parse and remember the operand.
if (ParseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1055,16 +1230,18 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
bool MipsAsmParser::
ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ StringRef Mnemonic;
// floating point instructions: should register be treated as double?
if (requestsDoubleOperand(Name)) {
setFpFormat(FP_FORMAT_D);
Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+ Mnemonic = Name;
}
else {
setDefaultFpFormat();
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
- StringRef Mnemonic = Name.slice(Start, Next);
+ Mnemonic = Name.slice(Start, Next);
Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
@@ -1083,8 +1260,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (Cc == -1) {
return Error(NameLoc, "Invalid conditional code");
}
- // FIXME: May include trailing whitespace...
- SMLoc E = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(
+ Parser.getTok().getLoc().getPointer() -1 );
Operands.push_back(MipsOperand::CreateImm(
MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
} else {
@@ -1104,9 +1281,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Name)) {
+ if (ParseOperand(Operands, Mnemonic)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1116,7 +1293,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// Parse and remember the operand.
if (ParseOperand(Operands, Name)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
@@ -1124,7 +1301,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
@@ -1134,7 +1311,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, ErrorMsg);
}
@@ -1157,6 +1334,7 @@ bool MipsAsmParser::parseSetAtDirective() {
// line can be
// .set at - defaults to $1
// or .set at=$reg
+ int AtRegNo;
getParser().Lex();
if (getLexer().is(AsmToken::EndOfStatement)) {
Options.setATReg(1);
@@ -1169,12 +1347,22 @@ bool MipsAsmParser::parseSetAtDirective() {
return false;
}
Parser.Lex(); // eat '$'
- if (getLexer().isNot(AsmToken::Integer)) {
+ const AsmToken &Reg = Parser.getTok();
+ if (Reg.is(AsmToken::Identifier)) {
+ AtRegNo = matchCPURegisterName(Reg.getIdentifier());
+ } else if (Reg.is(AsmToken::Integer)) {
+ AtRegNo = Reg.getIntVal();
+ } else {
reportParseError("unexpected token in statement");
return false;
}
- const AsmToken &Reg = Parser.getTok();
- if (!Options.setATReg(Reg.getIntVal())) {
+
+ if ( AtRegNo < 1 || AtRegNo > 31) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+
+ if (!Options.setATReg(AtRegNo)) {
reportParseError("unexpected token in statement");
return false;
}
@@ -1262,55 +1450,88 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetNoMacroDirective();
} else if (Tok.getString() == "nomips16") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
} else if (Tok.getString() == "nomicromips") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
+
return true;
}
+/// parseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
- if (DirectiveID.getString() == ".ent") {
+ StringRef IDVal = DirectiveID.getString();
+
+ if ( IDVal == ".ent") {
// ignore this directive for now
Parser.Lex();
return false;
}
- if (DirectiveID.getString() == ".end") {
+ if (IDVal == ".end") {
// ignore this directive for now
Parser.Lex();
return false;
}
- if (DirectiveID.getString() == ".frame") {
+ if (IDVal == ".frame") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
- if (DirectiveID.getString() == ".set") {
+ if (IDVal == ".set") {
return parseDirectiveSet();
}
- if (DirectiveID.getString() == ".fmask") {
+ if (IDVal == ".fmask") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
- if (DirectiveID.getString() == ".mask") {
+ if (IDVal == ".mask") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return false;
}
- if (DirectiveID.getString() == ".gpword") {
+ if (IDVal == ".gpword") {
// ignore this directive for now
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ if (IDVal == ".word") {
+ parseDirectiveWord(4, DirectiveID.getLoc());
return false;
}
diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt
index 048ad0d..7101c06 100644
--- a/lib/Target/Mips/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/Mips/Disassembler/Makefile b/lib/Target/Mips/Disassembler/Makefile
index a78feba..7900373 100644
--- a/lib/Target/Mips/Disassembler/Makefile
+++ b/lib/Target/Mips/Disassembler/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===##
+##===- lib/Target/Mips/Disassembler/Makefile ---------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 9560f3f..025a783 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -93,6 +93,11 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -322,6 +327,15 @@ static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
}
+static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+
+ return MCDisassembler::Fail;
+
+}
+
static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 68d3ac5..fc23cd3 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define PRINT_ALIAS_INSTR
#include "MipsGenAsmWriter.inc"
const char* Mips::MipsFCCToString(Mips::CondCode CC) {
@@ -78,7 +79,9 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << "\t.set\tmips32r2\n";
}
- printInstruction(MI, O);
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
printAnnotation(O, Annot);
switch (MI->getOpcode()) {
@@ -149,6 +152,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
OS << ')';
}
+void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printRegName(O, MI->getOperand(OpNo).getReg());
+}
+
void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 3d8a6f9..d1b561f 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -87,6 +87,9 @@ public:
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index be5d7e4..4212c94 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,8 @@ add_llvm_library(LLVMMipsDesc
MipsMCCodeEmitter.cpp
MipsMCTargetDesc.cpp
MipsELFObjectWriter.cpp
+ MipsReginfo.cpp
+ MipsELFStreamer.cpp
)
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index f82e203..6471b51 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -42,7 +42,6 @@ namespace {
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) const;
- virtual unsigned getEFlags() const;
virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
const MCValue &Target,
const MCFragment &F,
@@ -61,19 +60,6 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
MipsELFObjectWriter::~MipsELFObjectWriter() {}
-// FIXME: get the real EABI Version from the Subtarget class.
-unsigned MipsELFObjectWriter::getEFlags() const {
-
- // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static)
- unsigned Flag = ELF::EF_MIPS_NOREORDER;
-
- if (is64Bit())
- Flag |= ELF::EF_MIPS_ARCH_64R2;
- else
- Flag |= ELF::EF_MIPS_ARCH_32R2;
- return Flag;
-}
-
const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
const MCValue &Target,
const MCFragment &F,
@@ -108,7 +94,13 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
Type = ELF::R_MIPS_64;
break;
case FK_GPRel_4:
- Type = ELF::R_MIPS_GPREL32;
+ if (isN64()) {
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL32, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_64, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_NONE, Type);
+ }
+ else
+ Type = ELF::R_MIPS_GPREL32;
break;
case Mips::fixup_Mips_GPREL16:
Type = ELF::R_MIPS_GPREL16;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
new file mode 100644
index 0000000..c33bc9a
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -0,0 +1,89 @@
+//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsELFStreamer.h"
+#include "MipsSubtarget.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+ MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter,
+ RelaxAll, NoExecStack);
+ return S;
+ }
+
+ // For llc. Set a group of ELF header flags
+ void
+ MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) {
+
+ if (hasRawTextSupport())
+ return;
+
+ // Update e_header flags
+ MCAssembler& MCA = getAssembler();
+ unsigned EFlags = MCA.getELFHeaderEFlags();
+
+ if (Subtarget.inMips16Mode())
+ EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
+ else
+ EFlags |= ELF::EF_MIPS_NOREORDER;
+
+ // Architecture
+ if (Subtarget.hasMips64r2())
+ EFlags |= ELF::EF_MIPS_ARCH_64R2;
+ else if (Subtarget.hasMips64())
+ EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Subtarget.hasMips32r2())
+ EFlags |= ELF::EF_MIPS_ARCH_32R2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_32;
+
+ if (Subtarget.inMicroMipsMode())
+ EFlags |= ELF::EF_MIPS_MICROMIPS;
+
+ // ABI
+ if (Subtarget.isABI_O32())
+ EFlags |= ELF::EF_MIPS_ABI_O32;
+
+ // Relocation Model
+ Reloc::Model RM = Subtarget.getRelocationModel();
+ if (RM == Reloc::PIC_ || RM == Reloc::Default)
+ EFlags |= ELF::EF_MIPS_PIC;
+ else if (RM == Reloc::Static)
+ ; // Do nothing for Reloc::Static
+ else
+ llvm_unreachable("Unsupported relocation model for e_flags");
+
+ MCA.setELFHeaderEFlags(EFlags);
+ }
+
+ // For llc. Set a symbol's STO flags
+ void
+ MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget,
+ MCSymbol *Sym,
+ unsigned Val) {
+
+ if (hasRawTextSupport())
+ return;
+
+ MCSymbolData &Data = getOrCreateSymbolData(Sym);
+ // The "other" values are stored in the last 6 bits of the second byte
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, Val >> 2);
+ }
+
+} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
new file mode 100644
index 0000000..b10ccc7
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -0,0 +1,43 @@
+//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+#ifndef MIPSELFSTREAMER_H_
+#define MIPSELFSTREAMER_H_
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MipsAsmPrinter;
+class MipsSubtarget;
+class MCSymbol;
+
+class MipsELFStreamer : public MCELFStreamer {
+public:
+ MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack)
+ : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) {
+ }
+
+ ~MipsELFStreamer() {}
+ void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget);
+ void emitMipsSTOCG(const MipsSubtarget &Subtarget,
+ MCSymbol *Sym,
+ unsigned Val);
+
+ static bool classof(const MCStreamer *S) {
+ return S->getKind() == SK_MipsELFStreamer;
+ }
+};
+
+ MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
+}
+
+#endif /* MIPSELFSTREAMER_H_ */
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index a679749..5d4b32d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -25,8 +25,9 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
IsLittleEndian = false;
if ((TheTriple.getArch() == Triple::mips64el) ||
- (TheTriple.getArch() == Triple::mips64))
- PointerSize = 8;
+ (TheTriple.getArch() == Triple::mips64)) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 4b68b7e..96f93a0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -38,7 +38,8 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
bool IsLittleEndian;
public:
- MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) :
+ MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
+ const MCSubtargetInfo &sti, bool IsLittle) :
MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
~MipsMCCodeEmitter() {}
@@ -95,7 +96,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx)
{
- return new MipsMCCodeEmitter(MCII, Ctx, false);
+ return new MipsMCCodeEmitter(MCII, Ctx, STI, false);
}
MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
@@ -103,7 +104,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx)
{
- return new MipsMCCodeEmitter(MCII, Ctx, true);
+ return new MipsMCCodeEmitter(MCII, Ctx, STI, true);
}
/// EncodeInstruction - Emit the instruction.
@@ -141,12 +142,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
llvm_unreachable("unimplemented opcode in EncodeInstruction()");
const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
- uint64_t TSFlags = Desc.TSFlags;
-
- // Pseudo instructions don't get encoded and shouldn't be here
- // in the first place!
- if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo)
- llvm_unreachable("Pseudo opcode found in EncodeInstruction()");
// Get byte count of instruction
unsigned Size = Desc.getSize();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 9360971..be83b54 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsELFStreamer.h"
#include "MipsMCTargetDesc.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsMCAsmInfo.h"
@@ -131,7 +132,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
bool NoExecStack) {
Triple TheTriple(TT);
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeMipsTargetMC() {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
new file mode 100644
index 0000000..1dc9bcb
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp
@@ -0,0 +1,80 @@
+//===-- MipsReginfo.cpp - Registerinfo handling --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// .reginfo
+// Elf32_Word ri_gprmask
+// Elf32_Word ri_cprmask[4]
+// Elf32_Word ri_gp_value
+//
+// .MIPS.options - N64
+// Elf64_Byte kind (ODK_REGINFO)
+// Elf64_Byte size (40 bytes)
+// Elf64_Section section (0)
+// Elf64_Word info (unused)
+// Elf64_Word ri_gprmask ()
+// Elf64_Word ri_pad ()
+// Elf64_Word[4] ri_cprmask ()
+// Elf64_Addr ri_gp_value ()
+//
+// .MIPS.options - N32
+// Elf32_Byte kind (ODK_REGINFO)
+// Elf32_Byte size (36 bytes)
+// Elf32_Section section (0)
+// Elf32_Word info (unused)
+// Elf32_Word ri_gprmask ()
+// Elf32_Word ri_pad ()
+// Elf32_Word[4] ri_cprmask ()
+// Elf32_Addr ri_gp_value ()
+//
+//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsReginfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetObjectFile.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+// Integrated assembler version
+void
+MipsReginfo::emitMipsReginfoSectionCG(MCStreamer &OS,
+ const TargetLoweringObjectFile &TLOF,
+ const MipsSubtarget &MST) const
+{
+
+ if (OS.hasRawTextSupport())
+ return;
+
+ const MipsTargetObjectFile &TLOFELF =
+ static_cast<const MipsTargetObjectFile &>(TLOF);
+ OS.SwitchSection(TLOFELF.getReginfoSection());
+
+ // .reginfo
+ if (MST.isABI_O32()) {
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 4); // ri_gp_value
+ }
+ // .MIPS.options
+ else if (MST.isABI_N64()) {
+ OS.EmitIntValue(1, 1); // kind
+ OS.EmitIntValue(40, 1); // size
+ OS.EmitIntValue(0, 2); // section
+ OS.EmitIntValue(0, 4); // info
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // pad
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 8); // ri_gp_value
+ }
+ else llvm_unreachable("Unsupported abi for reginfo");
+}
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsReginfo.h b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
new file mode 100644
index 0000000..039b8ea
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsReginfo.h
@@ -0,0 +1,31 @@
+//=== MipsReginfo.h - MipsReginfo -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENCE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGINFO_H
+#define MIPSREGINFO_H
+
+namespace llvm {
+ class MCStreamer;
+ class TargetLoweringObjectFile;
+ class MipsSubtarget;
+
+ class MipsReginfo {
+ void anchor();
+ public:
+ MipsReginfo() {}
+
+ void emitMipsReginfoSectionCG(MCStreamer &OS,
+ const TargetLoweringObjectFile &TLOF,
+ const MipsSubtarget &MST) const;
+ };
+
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 23e2a94..1326623 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -80,6 +80,9 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
"Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
+ "microMips mode">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 127fcf2..1bb6fe4 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -139,6 +139,25 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void Mips16FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ const Mips16InstrInfo &TII =
+ *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
bool
Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 01db71e..25f4ffb 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -27,6 +27,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index 61602b6..4ff62ef 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -29,45 +29,13 @@
//
//===----------------------------------------------------------------------===//
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-//
-class Format16<bits<5> val> {
- bits<5> Value = val;
-}
-
-def Pseudo16 : Format16<0>;
-def FrmI16 : Format16<1>;
-def FrmRI16 : Format16<2>;
-def FrmRR16 : Format16<3>;
-def FrmRRI16 : Format16<4>;
-def FrmRRR16 : Format16<5>;
-def FrmRRI_A16 : Format16<6>;
-def FrmSHIFT16 : Format16<7>;
-def FrmI8_TYPE16 : Format16<8>;
-def FrmI8_MOVR3216 : Format16<9>;
-def FrmI8_MOV32R16 : Format16<10>;
-def FrmI8_SVRS16 : Format16<11>;
-def FrmJAL16 : Format16<12>;
-def FrmJALX16 : Format16<13>;
-def FrmEXT_I16 : Format16<14>;
-def FrmASMACRO16 : Format16<15>;
-def FrmEXT_RI16 : Format16<16>;
-def FrmEXT_RRI16 : Format16<17>;
-def FrmEXT_RRI_A16 : Format16<18>;
-def FrmEXT_SHIFT16 : Format16<19>;
-def FrmEXT_I816 : Format16<20>;
-def FrmEXT_I8_SVRS16 : Format16<21>;
-def FrmOther16 : Format16<22>; // Instruction w/ a custom format
// Base class for Mips 16 Format
// This class does not depend on the instruction size
//
class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>: Instruction
+ InstrItinClass itin>: Instruction
{
- Format16 Form = f;
let Namespace = "Mips";
@@ -78,14 +46,6 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
- //
- // Attributes specific to Mips instructions...
- //
- bits<5> FormBits = Form.Value;
-
- // TSFlags layout should be kept in sync with MipsInstrInfo.h.
- let TSFlags{4-0} = FormBits;
-
let Predicates = [InMips16Mode];
}
@@ -93,30 +53,35 @@ class MipsInst16_Base<dag outs, dag ins, string asmstr, list<dag> pattern,
// Generic Mips 16 Format
//
class MipsInst16<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>:
- MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+ InstrItinClass itin>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin>
{
field bits<16> Inst;
bits<5> Opcode = 0;
// Top 5 bits are the 'opcode' field
let Inst{15-11} = Opcode;
+
+ let Size=2;
+ field bits<16> SoftFail = 0;
}
//
// For 32 bit extended instruction forms.
//
class MipsInst16_32<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>:
- MipsInst16_Base<outs, ins, asmstr, pattern, itin, f>
+ InstrItinClass itin>:
+ MipsInst16_Base<outs, ins, asmstr, pattern, itin>
{
field bits<32> Inst;
-
+
+ let Size=4;
+ field bits<32> SoftFail = 0;
}
class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format16 f>:
- MipsInst16_32<outs, ins, asmstr, pattern, itin, f>
+ InstrItinClass itin>:
+ MipsInst16_32<outs, ins, asmstr, pattern, itin>
{
let Inst{31-27} = 0b11110;
}
@@ -125,7 +90,7 @@ class MipsInst16_EXTEND<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips Pseudo Instructions Format
class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst16<outs, ins, asmstr, pattern, IIPseudo, Pseudo16> {
+ MipsInst16<outs, ins, asmstr, pattern, IIPseudo> {
let isCodeGenOnly = 1;
let isPseudo = 1;
}
@@ -137,7 +102,7 @@ class MipsPseudo16<dag outs, dag ins, string asmstr, list<dag> pattern>:
class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<11> imm11;
@@ -152,7 +117,7 @@ class FI16<bits<5> op, dag outs, dag ins, string asmstr, list<dag> pattern,
class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRI16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<8> imm8;
@@ -169,7 +134,7 @@ class FRI16<bits<5> op, dag outs, dag ins, string asmstr,
class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -188,7 +153,7 @@ class FRR16<bits<5> _funct, dag outs, dag ins, string asmstr,
//
class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
string asmstr, list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> subfunct;
@@ -208,7 +173,7 @@ class FRR_SF16<bits<5> _funct, bits<3> _subfunct, dag outs, dag ins,
//
class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<6> _code; // code is a keyword in tablegen
bits<5> funct;
@@ -226,7 +191,7 @@ class FC16<bits<5> _funct, dag outs, dag ins, string asmstr,
class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<1> nd;
@@ -252,7 +217,7 @@ class FRR16_JALRC<bits<1> _nd, bits<1> _l, bits<1> r_a,
class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -272,7 +237,7 @@ class FRRI16<bits<5> op, dag outs, dag ins, string asmstr,
class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRR16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -294,7 +259,7 @@ class FRRR16<bits<2> _f, dag outs, dag ins, string asmstr,
class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmRRI_A16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -316,7 +281,7 @@ class FRRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmSHIFT16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> rx;
bits<3> ry;
@@ -338,7 +303,7 @@ class FSHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_TYPE16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> func;
bits<8> imm8;
@@ -356,7 +321,7 @@ class FI816<bits<3> _func, dag outs, dag ins, string asmstr,
class FI8_MOVR3216<dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOVR3216>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<4> ry;
@@ -378,7 +343,7 @@ class FI8_MOVR3216<dag outs, dag ins, string asmstr,
class FI8_MOV32R16<dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_MOV32R16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<3> func;
@@ -402,7 +367,7 @@ class FI8_MOV32R16<dag outs, dag ins, string asmstr,
class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+ MipsInst16<outs, ins, asmstr, pattern, itin>
{
bits<1> s;
bits<1> ra = 0;
@@ -429,7 +394,7 @@ class FI8_SVRS16<bits<1> _s, dag outs, dag ins, string asmstr,
class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_32<outs, ins, asmstr, pattern, itin, FrmJAL16>
+ MipsInst16_32<outs, ins, asmstr, pattern, itin>
{
bits<1> X;
bits<26> imm26;
@@ -452,7 +417,7 @@ class FJAL16<bits<1> _X, dag outs, dag ins, string asmstr,
class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<16> imm16;
bits<5> eop;
@@ -474,7 +439,7 @@ class FEXT_I16<bits<5> _eop, dag outs, dag ins, string asmstr,
class FASMACRO16<dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmASMACRO16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<3> select;
bits<3> p4;
@@ -503,7 +468,7 @@ class FASMACRO16<dag outs, dag ins, string asmstr,
class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RI16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<16> imm16;
bits<5> op;
@@ -527,7 +492,7 @@ class FEXT_RI16<bits<5> _op, dag outs, dag ins, string asmstr,
class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<5> op;
bits<16> imm16;
@@ -552,7 +517,7 @@ class FEXT_RRI16<bits<5> _op, dag outs, dag ins, string asmstr,
class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_RRI_A16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<15> imm15;
bits<3> rx;
@@ -578,7 +543,7 @@ class FEXT_RRI_A16<bits<1> _f, dag outs, dag ins, string asmstr,
class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_SHIFT16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<6> sa6;
bits<3> rx;
@@ -605,7 +570,7 @@ class FEXT_SHIFT16<bits<2> _f, dag outs, dag ins, string asmstr,
class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmEXT_I816>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<16> imm16;
bits<5> I8;
@@ -630,7 +595,7 @@ class FEXT_I816<bits<3> _funct, dag outs, dag ins, string asmstr,
class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin, FrmI8_SVRS16>
+ MipsInst16_EXTEND<outs, ins, asmstr, pattern, itin>
{
bits<3> xsregs =0;
bits<8> framesize =0;
@@ -659,5 +624,3 @@ class FEXT_I8_SVRS16<bits<1> s_, dag outs, dag ins, string asmstr,
}
-
-
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 91b5ba0..fd3cc8f 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -19,7 +19,9 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -28,7 +30,8 @@ using namespace llvm;
static cl::opt<bool> NeverUseSaveRestore(
"mips16-never-use-save-restore",
cl::init(false),
- cl::desc("For testing ability to adjust stack pointer without save/restore instruction"),
+ cl::desc("For testing ability to adjust stack pointer "
+ "without save/restore instruction"),
cl::Hidden);
@@ -129,7 +132,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineBasicBlock &MBB = *MI->getParent();
-
switch(MI->getDesc().getOpcode()) {
default:
return false;
@@ -169,19 +171,20 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
}
// Adjust SP by FrameSize bytes. Save RA, S0, S1
-void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
+void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (!NeverUseSaveRestore) {
if (isUInt<11>(FrameSize))
BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize);
else {
- int Base = 2040; // should create template function like isUInt that returns largest
- // possible n bit unsigned integer
+ int Base = 2040; // should create template function like isUInt that
+ // returns largest possible n bit unsigned integer
int64_t Remainder = FrameSize - Base;
BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base);
if (isInt<16>(-Remainder))
- BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(-Remainder);
+ BuildAddiuSpImm(MBB, I, -Remainder);
else
adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1);
}
@@ -193,13 +196,16 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBloc
// sw s1, -8[sp]
// sw s0, -12[sp]
- MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::RA);
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::RA);
MIB1.addReg(Mips::SP);
MIB1.addImm(-4);
- MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S1);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::S1);
MIB2.addReg(Mips::SP);
MIB2.addImm(-8);
- MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S0);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16),
+ Mips::S0);
MIB3.addReg(Mips::SP);
MIB3.addImm(-12);
adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1);
@@ -207,18 +213,19 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBloc
}
// Adjust SP by FrameSize bytes. Restore RA, S0, S1
-void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (!NeverUseSaveRestore) {
if (isUInt<11>(FrameSize))
BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize);
else {
- int Base = 2040; // should create template function like isUInt that returns largest
- // possible n bit unsigned integer
+ int Base = 2040; // should create template function like isUInt that
+ // returns largest possible n bit unsigned integer
int64_t Remainder = FrameSize - Base;
if (isInt<16>(Remainder))
- BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Remainder);
+ BuildAddiuSpImm(MBB, I, Remainder);
else
adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1);
BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base);
@@ -229,15 +236,19 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicB
// lw ra, -4[sp]
// lw s1, -8[sp]
// lw s0, -12[sp]
- MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::A0);
+ MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::A0);
MIB1.addReg(Mips::SP);
MIB1.addImm(-4);
- MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::RA);
+ MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+ Mips::RA);
MIB0.addReg(Mips::A0);
- MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S1);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::S1);
MIB2.addReg(Mips::SP);
MIB2.addImm(-8);
- MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S0);
+ MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16),
+ Mips::S0);
MIB3.addReg(Mips::SP);
MIB3.addImm(-12);
}
@@ -245,10 +256,12 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicB
}
// Adjust SP by Amount bytes where bytes can be up to 32bit number.
-// This can only be called at times that we know that there is at least one free register.
+// This can only be called at times that we know that there is at least one free
+// register.
// This is clearly safe at prologue and epilogue.
//
-void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Reg1, unsigned Reg2) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
@@ -269,11 +282,13 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasi
MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1);
MIB3.addReg(Reg1);
MIB3.addReg(Reg2, RegState::Kill);
- MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::SP);
+ MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16),
+ Mips::SP);
MIB4.addReg(Reg1, RegState::Kill);
}
-void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
assert(false && "adjust stack pointer amount exceeded");
}
@@ -282,9 +297,8 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount,
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
- BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Amount);
+ BuildAddiuSpImm(MBB, I, Amount);
else
adjustStackPtrBigUnrestricted(SP, Amount, MBB, I);
}
@@ -292,11 +306,79 @@ void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
/// This function generates the sequence of instructions needed to get the
/// result of adding register REG and immediate IMM.
unsigned
-Mips16InstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+Mips16InstrInfo::loadImmediate(unsigned FrameReg,
+ int64_t Imm, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, DebugLoc DL,
- unsigned *NewImm) const {
+ unsigned &NewImm) const {
+ //
+ // given original instruction is:
+ // Instr rx, T[offset] where offset is too big.
+ //
+ // lo = offset & 0xFFFF
+ // hi = ((offset >> 16) + (lo >> 15)) & 0xFFFF;
+ //
+ // let T = temporary register
+ // li T, hi
+ // shl T, 16
+ // add T, Rx, T
+ //
+ RegScavenger rs;
+ int32_t lo = Imm & 0xFFFF;
+ int32_t hi = ((Imm >> 16) + (lo >> 15)) & 0xFFFF;
+ NewImm = lo;
+ unsigned Reg =0;
+ unsigned SpReg = 0;
+ rs.enterBasicBlock(&MBB);
+ rs.forward(II);
+ //
+ // we use T0 for the first register, if we need to save something away.
+ // we use T1 for the second register, if we need to save something away.
+ //
+ unsigned FirstRegSaved =0, SecondRegSaved=0;
+ unsigned FirstRegSavedTo = 0, SecondRegSavedTo = 0;
+
+ Reg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+ if (Reg == 0) {
+ FirstRegSaved = Reg = Mips::V0;
+ FirstRegSavedTo = Mips::T0;
+ copyPhysReg(MBB, II, DL, FirstRegSavedTo, FirstRegSaved, true);
+ }
+ else
+ rs.setUsed(Reg);
+ BuildMI(MBB, II, DL, get(Mips::LiRxImmX16), Reg).addImm(hi);
+ BuildMI(MBB, II, DL, get(Mips::SllX16), Reg).addReg(Reg).
+ addImm(16);
+ if (FrameReg == Mips::SP) {
+ SpReg = rs.FindUnusedReg(&Mips::CPU16RegsRegClass);
+ if (SpReg == 0) {
+ if (Reg != Mips::V1) {
+ SecondRegSaved = SpReg = Mips::V1;
+ SecondRegSavedTo = Mips::T1;
+ }
+ else {
+ SecondRegSaved = SpReg = Mips::V0;
+ SecondRegSavedTo = Mips::T0;
+ }
+ copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true);
+ }
+ else
+ rs.setUsed(SpReg);
- return 0;
+ copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false);
+ BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(SpReg)
+ .addReg(Reg);
+ }
+ else
+ BuildMI(MBB, II, DL, get(Mips:: AdduRxRyRz16), Reg).addReg(FrameReg)
+ .addReg(Reg, RegState::Kill);
+ if (FirstRegSaved || SecondRegSaved) {
+ II = llvm::next(II);
+ if (FirstRegSaved)
+ copyPhysReg(MBB, II, DL, FirstRegSaved, FirstRegSavedTo, true);
+ if (SecondRegSaved)
+ copyPhysReg(MBB, II, DL, SecondRegSaved, SecondRegSavedTo, true);
+ }
+ return Reg;
}
unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
@@ -317,6 +399,20 @@ void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
}
+
+const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
+ if (validSpImm8(Imm))
+ return get(Mips::AddiuSpImm16);
+ else
+ return get(Mips::AddiuSpImmX16);
+}
+
+void Mips16InstrInfo::BuildAddiuSpImm
+ (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
+}
+
const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
return new Mips16InstrInfo(TM);
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 3704e25..1cb1dfe 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -77,12 +77,27 @@ public:
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- /// Emit a series of instructions to load an immediate. If NewImm is a
- /// non-NULL parameter, the last instruction is not emitted, but instead
- /// its immediate operand is returned in NewImm.
- unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
+ /// Emit a series of instructions to load an immediate.
+ // This is to adjust some FrameReg. We return the new register to be used
+ // in place of FrameReg and the adjusted immediate field (&NewImm)
+ //
+ unsigned loadImmediate(unsigned FrameReg,
+ int64_t Imm, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II, DebugLoc DL,
- unsigned *NewImm) const;
+ unsigned &NewImm) const;
+
+ static bool validSpImm8(int offset) {
+ return ((offset & 7) == 0) && isInt<11>(offset);
+ }
+
+ //
+ // build the proper one based on the Imm field
+ //
+
+ const MCInstrDesc& AddiuSpImm(int64_t Imm) const;
+
+ void BuildAddiuSpImm
+ (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
private:
virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
@@ -100,7 +115,6 @@ private:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
-
};
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index e8e2f3c..a9e9c52 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -32,18 +32,76 @@ def mem16_ea : Operand<i32> {
}
//
+//
+// I8 instruction format
+//
+
+class FI816_ins_base<bits<3> _func, string asmstr,
+ string asmstr2, InstrItinClass itin>:
+ FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
+ [], itin>;
+
+
+class FI816_SP_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
+
+//
+// RI instruction format
+//
+
+
+class FRI16_ins_base<bits<5> op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FRI16<op, (outs CPU16Regs:$rx), (ins simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16_ins<bits<5> op, string asmstr,
+ InstrItinClass itin>:
+ FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FRI16R_ins<bits<5> op, string asmstr,
+ InstrItinClass itin>:
+ FRI16R_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+
+class F2RI16_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin> {
+ let Constraints = "$rx_ = $rx";
+}
+
+class FRI16_B_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
+ !strconcat(asmstr, "\t$rx, $imm # 16 bit inst"), [], itin>;
+//
// Compare a register and immediate and place result in CC
// Implicit use of T8
//
// EXT-CCRR Instruction format
//
-class FEXT_CCRXI16_ins<bits<5> _op, string asmstr,
- InstrItinClass itin>:
- FEXT_RI16<_op, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
- !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), [], itin> {
+class FEXT_CCRXI16_ins<string asmstr>:
+ MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, "\t$rx, $imm\n\tmove\t$cc, $$t8"), []> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
+// JAL and JALX instruction format
+//
+class FJAL16_ins<bits<1> _X, string asmstr,
+ InstrItinClass itin>:
+ FJAL16<_X, (outs), (ins simm20:$imm),
+ !strconcat(asmstr, "\t$imm\n\tnop"),[],
+ itin> {
+ let isCodeGenOnly=1;
+}
//
// EXT-I instruction format
//
@@ -77,10 +135,11 @@ class FEXT_I816_SP_ins<bits<3> _func, string asmstr,
//
// CC-RR Instruction format
//
-class FCCRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), [], itin> {
+class FCCRR16_ins<string asmstr> :
+ MipsPseudo16<(outs CPU16Regs:$cc), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$cc, $$t8"), []> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
//
@@ -96,6 +155,15 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+class FEXT_RI16R_ins_base<bits<5> _op, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs ), (ins CPU16Regs:$rx, simm16:$imm),
+ !strconcat(asmstr, asmstr2), [], itin>;
+
+class FEXT_RI16R_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16R_ins_base<_op, asmstr, "\t$rx, $imm", itin>;
+
class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
@@ -153,25 +221,25 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
//
// EXT-T8I8
//
-class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2,
- InstrItinClass itin>:
- FEXT_I816<_func, (outs),
- (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
- !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
- !strconcat(asmstr, "\t$imm"))),[], itin> {
+class FEXT_T8I816_ins<string asmstr, string asmstr2>:
+ MipsPseudo16<(outs),
+ (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
+ !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
+ !strconcat(asmstr, "\t$imm"))),[]> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
//
// EXT-T8I8I
//
-class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2,
- InstrItinClass itin>:
- FEXT_I816<_func, (outs),
- (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
- !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
- !strconcat(asmstr, "\t$targ"))), [], itin> {
+class FEXT_T8I8I16_ins<string asmstr, string asmstr2>:
+ MipsPseudo16<(outs),
+ (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
+ !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
+ !strconcat(asmstr, "\t$targ"))), []> {
let isCodeGenOnly=1;
+ let usesCustomInserter = 1;
}
//
@@ -219,9 +287,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
!strconcat(asmstr, "\t$rx, $ry"), [], itin> {
}
-class FRRTR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
- FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
- !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), [], itin> ;
+class FRR16R_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+ FRR16<f, (outs), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRRTR16_ins<string asmstr> :
+ MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+ !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), []> ;
//
// maybe refactor but need a $zero as a dummy first parameter
@@ -257,7 +330,7 @@ class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra,
string asmstr, InstrItinClass itin>:
- FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
+ FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
!strconcat(asmstr, "\t $rx"), [], itin> ;
//
@@ -296,13 +369,13 @@ class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> :
//
// So this pseudo class only has one operand, i.e. op
//
-class Sel<bits<5> f1, string op, InstrItinClass itin>:
- MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
- CPU16Regs:$rt),
- !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin,
- Pseudo16> {
- let isCodeGenOnly=1;
+class Sel<string op>:
+ MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rt),
+ !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), []> {
+ //let isCodeGenOnly=1;
let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
}
//
@@ -320,16 +393,15 @@ class Sel<bits<5> f1, string op, InstrItinClass itin>:
// move $rd, $rs
//
//
-class SeliT<bits<5> f1, string op1, bits<5> f2, string op2,
- InstrItinClass itin>:
- MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
- CPU16Regs:$rl, simm16:$imm),
- !strconcat(op2,
- !strconcat("\t$rl, $imm\n\t",
- !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin,
- Pseudo16> {
+class SeliT<string op1, string op2>:
+ MipsPseudo16<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+ CPU16Regs:$rl, simm16:$imm),
+ !strconcat(op2,
+ !strconcat("\t$rl, $imm\n\t",
+ !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
let isCodeGenOnly=1;
let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
}
//
@@ -344,16 +416,16 @@ class SeliT<bits<5> f1, string op1, bits<5> f2, string op2,
// move $rd, $rs
//
//
-class SelT<bits<5> f1, string op1, bits<5> f2, string op2,
- InstrItinClass itin>:
- MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs,
+class SelT<string op1, string op2>:
+ MipsPseudo16<(outs CPU16Regs:$rd_),
+ (ins CPU16Regs:$rd, CPU16Regs:$rs,
CPU16Regs:$rl, CPU16Regs:$rr),
- !strconcat(op2,
- !strconcat("\t$rl, $rr\n\t",
- !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin,
- Pseudo16> {
+ !strconcat(op2,
+ !strconcat("\t$rl, $rr\n\t",
+ !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), []> {
let isCodeGenOnly=1;
let Constraints = "$rd = $rd_";
+ let usesCustomInserter = 1;
}
//
@@ -363,7 +435,7 @@ def imm32: Operand<i32>;
def Constant32:
MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
-
+
def LwConstant32:
MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm),
"lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
@@ -401,14 +473,21 @@ class MayStore {
}
//
+
// Format: ADDIU rx, immediate MIPS16e
// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
// To add a constant to a 32-bit integer.
//
def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+ ArithLogic16Defs<0> {
+ let AddedComplexity = 5;
+}
def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
- ArithLogic16Defs<0>;
+ ArithLogic16Defs<0> {
+ let isCodeGenOnly = 1;
+}
def AddiuRxRyOffMemX16:
FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
@@ -426,11 +505,18 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
// Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended)
// To add a constant to the stack pointer.
//
+def AddiuSpImm16
+ : FI816_SP_ins<0b011, "addiu", IIAlu> {
+ let Defs = [SP];
+ let Uses = [SP];
+ let AddedComplexity = 5;
+}
+
def AddiuSpImmX16
: FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
let Defs = [SP];
let Uses = [SP];
-}
+}
//
// Format: ADDU rz, rx, ry MIPS16e
@@ -450,6 +536,14 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
//
// Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
// Purpose: Branch on Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
@@ -463,6 +557,13 @@ def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
//
// Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
// Purpose: Branch on Not Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
@@ -473,20 +574,22 @@ def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
// Purpose: Branch on T Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
-def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16;
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
-def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16;
+def BteqzT8CmpX16: FEXT_T8I816_ins<"bteqz", "cmp">, cbranch16;
-def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>,
+def BteqzT8CmpiX16: FEXT_T8I8I16_ins<"bteqz", "cmpi">,
cbranch16;
-def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16;
+def BteqzT8SltX16: FEXT_T8I816_ins<"bteqz", "slt">, cbranch16;
-def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16;
+def BteqzT8SltuX16: FEXT_T8I816_ins<"bteqz", "sltu">, cbranch16;
-def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16;
+def BteqzT8SltiX16: FEXT_T8I8I16_ins<"bteqz", "slti">, cbranch16;
-def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
+def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
cbranch16;
//
@@ -494,22 +597,52 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
// Purpose: Branch on T Not Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
-def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16;
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+ let Uses = [T8];
+}
-def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16;
+def BtnezT8CmpX16: FEXT_T8I816_ins<"btnez", "cmp">, cbranch16;
-def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16;
+def BtnezT8CmpiX16: FEXT_T8I8I16_ins<"btnez", "cmpi">, cbranch16;
-def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16;
+def BtnezT8SltX16: FEXT_T8I816_ins<"btnez", "slt">, cbranch16;
-def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16;
+def BtnezT8SltuX16: FEXT_T8I816_ins<"btnez", "sltu">, cbranch16;
-def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16;
+def BtnezT8SltiX16: FEXT_T8I8I16_ins<"btnez", "slti">, cbranch16;
-def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>,
+def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
cbranch16;
//
+// Format: CMP rx, ry MIPS16e
+// Purpose: Compare
+// To compare the contents of two GPRs.
+//
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: CMPI rx, immediate MIPS16e
+// Purpose: Compare Immediate (Extended)
+// To compare a constant with the contents of a GPR.
+//
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+ let Defs = [T8];
+}
+
+
+//
// Format: DIV rx, ry MIPS16e
// Purpose: Divide Word
// To divide 32-bit signed integers.
@@ -526,7 +659,19 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
let Defs = [HI, LO];
}
+//
+// Format: JAL target MIPS16e
+// Purpose: Jump and Link
+// To execute a procedure call within the current 256 MB-aligned
+// region and preserve the current ISA.
+//
+def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+ let isBranch = 1;
+ let hasDelaySlot = 0; // not true, but we add the nop for now
+ let isTerminator=1;
+ let isBarrier=1;
+}
//
// Format: JR ra MIPS16e
@@ -543,7 +688,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
let isBarrier=1;
}
-def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> {
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
let isBranch = 1;
let isIndirectBranch = 1;
let isTerminator=1;
@@ -561,7 +706,9 @@ def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
// Purpose: Load Byte (Extended)
// To load a byte from memory as a signed value.
//
-def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
//
// Format: LBU ry, offset(rx) MIPS16e
@@ -569,14 +716,18 @@ def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad;
// To load a byte from memory as a unsigned value.
//
def LbuRxRyOffMemX16:
- FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad;
+ FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad {
+ let isCodeGenOnly = 1;
+}
//
// Format: LH ry, offset(rx) MIPS16e
// Purpose: Load Halfword signed (Extended)
// To load a halfword from memory as a signed value.
//
-def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
//
// Format: LHU ry, offset(rx) MIPS16e
@@ -584,7 +735,16 @@ def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad;
// To load a halfword from memory as an unsigned value.
//
def LhuRxRyOffMemX16:
- FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad;
+ FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad {
+ let isCodeGenOnly = 1;
+}
+
+//
+// Format: LI rx, immediate MIPS16e
+// Purpose: Load Immediate
+// To load a constant into a GPR.
+//
+def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
//
// Format: LI rx, immediate MIPS16e
@@ -598,7 +758,9 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
// Purpose: Load Word (Extended)
// To load a word from memory as a signed value.
//
-def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{
+ let isCodeGenOnly = 1;
+}
// Format: LW rx, offset(sp) MIPS16e
// Purpose: Load Word (SP-Relative, Extended)
@@ -779,7 +941,7 @@ def SbRxRyOffMemX16:
// Purpose: if rt==0, do nothing
// else rs = rt
//
-def SelBeqZ: Sel<0b00100, "beqz", IIAlu>;
+def SelBeqZ: Sel<"beqz">;
//
// Format: SelTBteqZCmp rd, rs, rl, rr
@@ -787,7 +949,7 @@ def SelBeqZ: Sel<0b00100, "beqz", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>;
+def SelTBteqZCmp: SelT<"bteqz", "cmp">;
//
// Format: SelTBteqZCmpi rd, rs, rl, rr
@@ -795,7 +957,7 @@ def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>;
+def SelTBteqZCmpi: SeliT<"bteqz", "cmpi">;
//
// Format: SelTBteqZSlt rd, rs, rl, rr
@@ -803,7 +965,7 @@ def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>;
+def SelTBteqZSlt: SelT<"bteqz", "slt">;
//
// Format: SelTBteqZSlti rd, rs, rl, rr
@@ -811,7 +973,7 @@ def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>;
+def SelTBteqZSlti: SeliT<"bteqz", "slti">;
//
// Format: SelTBteqZSltu rd, rs, rl, rr
@@ -819,7 +981,7 @@ def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>;
+def SelTBteqZSltu: SelT<"bteqz", "sltu">;
//
// Format: SelTBteqZSltiu rd, rs, rl, rr
@@ -827,14 +989,14 @@ def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>;
// If b==0 then do nothing.
// if b!=0 then rd = rs
//
-def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>;
+def SelTBteqZSltiu: SeliT<"bteqz", "sltiu">;
//
// Format: SelBnez rd, rs, rt
// Purpose: if rt!=0, do nothing
// else rs = rt
//
-def SelBneZ: Sel<0b00101, "bnez", IIAlu>;
+def SelBneZ: Sel<"bnez">;
//
// Format: SelTBtneZCmp rd, rs, rl, rr
@@ -842,7 +1004,7 @@ def SelBneZ: Sel<0b00101, "bnez", IIAlu>;
// If b!=0 then do nothing.
// if b0=0 then rd = rs
//
-def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>;
+def SelTBtneZCmp: SelT<"btnez", "cmp">;
//
// Format: SelTBtnezCmpi rd, rs, rl, rr
@@ -850,7 +1012,7 @@ def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>;
+def SelTBtneZCmpi: SeliT<"btnez", "cmpi">;
//
// Format: SelTBtneZSlt rd, rs, rl, rr
@@ -858,7 +1020,7 @@ def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>;
+def SelTBtneZSlt: SelT<"btnez", "slt">;
//
// Format: SelTBtneZSlti rd, rs, rl, rr
@@ -866,7 +1028,7 @@ def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>;
+def SelTBtneZSlti: SeliT<"btnez", "slti">;
//
// Format: SelTBtneZSltu rd, rs, rl, rr
@@ -874,7 +1036,7 @@ def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>;
+def SelTBtneZSltu: SelT<"btnez", "sltu">;
//
// Format: SelTBtneZSltiu rd, rs, rl, rr
@@ -882,7 +1044,7 @@ def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>;
// If b!=0 then do nothing.
// if b==0 then rd = rs
//
-def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>;
+def SelTBtneZSltiu: SeliT<"btnez", "sltiu">;
//
//
// Format: SH ry, offset(rx) MIPS16e
@@ -906,39 +1068,78 @@ def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
//
def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+ let Defs = [T8];
+}
+
//
// Format: SLTI rx, immediate MIPS16e
// Purpose: Set on Less Than Immediate (Extended)
// To record the result of a less-than comparison with a constant.
//
-def SltiCCRxImmX16: FEXT_CCRXI16_ins<0b01010, "slti", IIAlu>;
+//
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+ let Defs = [T8];
+}
+
+def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
+// Format: SLTIU rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+ let Defs = [T8];
+}
+
+//
+// Format: SLTI rx, immediate MIPS16e
+// Purpose: Set on Less Than Immediate Unsigned (Extended)
+// To record the result of a less-than comparison with a constant.
+//
+//
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+ let Defs = [T8];
+}
//
// Format: SLTIU rx, immediate MIPS16e
// Purpose: Set on Less Than Immediate Unsigned (Extended)
// To record the result of a less-than comparison with a constant.
//
-def SltiuCCRxImmX16: FEXT_CCRXI16_ins<0b01011, "sltiu", IIAlu>;
+def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
//
// Format: SLT rx, ry MIPS16e
// Purpose: Set on Less Than
// To record the result of a less-than comparison.
//
-def SltRxRy16: FRR16_ins<0b00010, "slt", IIAlu>;
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+ let Defs = [T8];
+}
-def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>;
+def SltCCRxRy16: FCCRR16_ins<"slt">;
// Format: SLTU rx, ry MIPS16e
// Purpose: Set on Less Than Unsigned
// To record the result of an unsigned less-than comparison.
//
-def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> {
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+ let Defs = [T8];
+}
+
+def SltuRxRyRz16: FRRTR16_ins<"sltu"> {
let isCodeGenOnly=1;
+ let Defs = [T8];
}
-def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>;
+def SltuCCRxRy16: FCCRR16_ins<"sltu">;
//
// Format: SRAV ry, rx MIPS16e
// Purpose: Shift Word Right Arithmetic Variable
@@ -1034,6 +1235,7 @@ class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
(I CPU16Regs:$in, imm_type:$imm)>;
+def: ArithLogicI16_pat<add, immSExt8, AddiuRxRxImm16>;
def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
@@ -1067,14 +1269,19 @@ def: StoreM16_pat<store, SwRxRyOffMemX16>;
// Unconditional branch
class UncondBranch16_pat<SDNode OpNode, Instruction I>:
Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> {
- let Predicates = [RelocPIC, InMips16Mode];
+ let Predicates = [InMips16Mode];
}
+def : Mips16Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (Jal16 tglobaladdr:$dst)>;
+
+def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (Jal16 texternalsym:$dst)>;
+
// Indirect branch
def: Mips16Pat<
- (brind CPU16Regs:$rs),
- (JrcRx16 CPU16Regs:$rs)>;
-
+ (brind CPU16Regs:$rs),
+ (JrcRx16 CPU16Regs:$rs)>;
// Jump and Link (Call)
let isCall=1, hasDelaySlot=0 in
@@ -1502,7 +1709,7 @@ def: Mips16Pat
//
def: Mips16Pat
<(setle CPU16Regs:$lhs, CPU16Regs:$rhs),
- (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImmX16 1))>;
+ (XorRxRxRy16 (SltCCRxRy16 CPU16Regs:$rhs, CPU16Regs:$lhs), (LiRxImm16 1))>;
//
// setlt
@@ -1562,7 +1769,11 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
// hi/lo relocs
-def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
+def : Mips16Pat<(MipsHi tglobaladdr:$in),
+ (SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>;
+def : Mips16Pat<(MipsHi tjumptable:$in),
+ (SllX16 (LiRxImmX16 tjumptable:$in), 16)>;
+def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
(SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>;
// wrapper_pic
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index c2e09a7..0ea9368 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,3 +1,4 @@
+
//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
//
// The LLVM Compiler Infrastructure
@@ -12,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "Mips16RegisterInfo.h"
+#include "Mips16InstrInfo.h"
#include "Mips.h"
#include "Mips16InstrInfo.h"
#include "MipsAnalyzeImmediate.h"
@@ -23,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
@@ -69,27 +72,6 @@ bool Mips16RegisterInfo::saveScavengerRegister
return true;
}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void Mips16RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
-
- if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
- Amount = -Amount;
-
- const Mips16InstrInfo *II = static_cast<const Mips16InstrInfo*>(&TII);
-
- II->adjustStackPtr(Mips::SP, Amount, MBB, I);
- }
-
- MBB.erase(I);
-}
-
void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned OpNo, int FrameIndex,
uint64_t StackSize,
@@ -140,6 +122,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// by adding the size of the stack:
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
+ bool IsKill = false;
Offset = SPOffset + (int64_t)StackSize;
Offset += MI.getOperand(OpNo + 1).getImm();
@@ -148,9 +131,14 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) ||
((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) {
- llvm_unreachable("frame offset does not fit in instruction");
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned NewImm;
+ FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
+ Offset = SignExtend64<16>(NewImm);
+ IsKill = true;
}
- MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+ MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 6101739..b8f818a 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -25,10 +25,6 @@ public:
Mips16RegisterInfo(const MipsSubtarget &Subtarget,
const Mips16InstrInfo &TII);
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
bool requiresRegisterScavenging(const MachineFunction &MF) const;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index cdf12c8..494ba87 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -71,52 +71,55 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
//===----------------------------------------------------------------------===//
let DecoderNamespace = "Mips64" in {
/// Arithmetic Instructions (ALU Immediate)
-def DADDi : ArithLogicI<"daddi", simm16_64, CPU64Regs>, ADDI_FM<0x18>;
-def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64Regs, immSExt16, add>,
+def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>;
+def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>,
ADDI_FM<0x19>, IsAsCheapAsAMove;
-def DANDi : ArithLogicI<"andi", uimm16_64, CPU64Regs, immZExt16, and>,
+def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>,
ADDI_FM<0xc>;
def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>,
SLTI_FM<0xa>;
def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>,
SLTI_FM<0xb>;
-def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64Regs, immZExt16, or>,
+def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>,
ADDI_FM<0xd>;
-def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64Regs, immZExt16, xor>,
+def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>,
ADDI_FM<0xe>;
def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def DADD : ArithLogicR<"dadd", CPU64Regs>, ADD_FM<0, 0x2c>;
-def DADDu : ArithLogicR<"daddu", CPU64Regs, 1, IIAlu, add>, ADD_FM<0, 0x2d>;
-def DSUBu : ArithLogicR<"dsubu", CPU64Regs, 0, IIAlu, sub>, ADD_FM<0, 0x2f>;
+def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>;
+def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>,
+ ADD_FM<0, 0x2d>;
+def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>,
+ ADD_FM<0, 0x2f>;
def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>;
def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>;
-def AND64 : ArithLogicR<"and", CPU64Regs, 1, IIAlu, and>, ADD_FM<0, 0x24>;
-def OR64 : ArithLogicR<"or", CPU64Regs, 1, IIAlu, or>, ADD_FM<0, 0x25>;
-def XOR64 : ArithLogicR<"xor", CPU64Regs, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
-def NOR64 : LogicNOR<"nor", CPU64Regs>, ADD_FM<0, 0x27>;
+def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def DSLL : shift_rotate_imm<"dsll", shamt, CPU64Regs, shl, immZExt6>,
+def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>,
SRA_FM<0x38, 0>;
-def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64Regs, srl, immZExt6>,
+def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>,
SRA_FM<0x3a, 0>;
-def DSRA : shift_rotate_imm<"dsra", shamt, CPU64Regs, sra, immZExt6>,
+def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>,
SRA_FM<0x3b, 0>;
-def DSLLV : shift_rotate_reg<"dsllv", CPU64Regs, shl>, SRLV_FM<0x14, 0>;
-def DSRLV : shift_rotate_reg<"dsrlv", CPU64Regs, srl>, SRLV_FM<0x16, 0>;
-def DSRAV : shift_rotate_reg<"dsrav", CPU64Regs, sra>, SRLV_FM<0x17, 0>;
-def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64Regs>, SRA_FM<0x3c, 0>;
-def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64Regs>, SRA_FM<0x3e, 0>;
-def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64Regs>, SRA_FM<0x3f, 0>;
+def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>;
+def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>;
+def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>;
}
// Rotate Instructions
let Predicates = [HasMips64r2, HasStdEnc],
DecoderNamespace = "Mips64" in {
- def DROTR : shift_rotate_imm<"drotr", shamt, CPU64Regs, rotr, immZExt6>,
- SRA_FM<0x3a, 1>;
- def DROTRV : shift_rotate_reg<"drotrv", CPU64Regs, rotr>, SRLV_FM<0x16, 1>;
+ def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>,
+ SRA_FM<0x3a, 1>;
+ def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>,
+ SRLV_FM<0x16, 1>;
}
let DecoderNamespace = "Mips64" in {
@@ -135,12 +138,11 @@ defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>;
defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>;
/// load/store left/right
-let isCodeGenOnly = 1 in {
- defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>;
- defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>;
- defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>;
- defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>;
-}
+defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>;
+defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>;
+defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>;
+defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>;
+
defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>;
defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>;
defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>;
@@ -148,13 +150,13 @@ defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>;
/// Load-linked, Store-conditional
let Predicates = [NotN64, HasStdEnc] in {
- def LLD : LLBase<"lld", CPU64Regs, mem>, LW_FM<0x34>;
- def SCD : SCBase<"scd", CPU64Regs, mem>, LW_FM<0x3c>;
+ def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>;
+ def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>;
}
let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
- def LLD_P8 : LLBase<"lld", CPU64Regs, mem64>, LW_FM<0x34>;
- def SCD_P8 : SCBase<"scd", CPU64Regs, mem64>, LW_FM<0x3c>;
+ def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>;
+ def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>;
}
/// Jump and Branch Instructions
@@ -168,15 +170,18 @@ def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>;
}
let DecoderNamespace = "Mips64" in
def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM;
+def JALR64Pseudo : JumpLinkRegPseudo<CPU64Regs, JALR64, RA_64>;
def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
let DecoderNamespace = "Mips64" in {
/// Multiply and Divide Instructions.
-def DMULT : Mult<"dmult", IIImul, CPU64Regs, [HI64, LO64]>, MULT_FM<0, 0x1c>;
-def DMULTu : Mult<"dmultu", IIImul, CPU64Regs, [HI64, LO64]>, MULT_FM<0, 0x1d>;
-def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64Regs, [HI64, LO64]>,
+def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1c>;
+def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>,
+ MULT_FM<0, 0x1d>;
+def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
MULT_FM<0, 0x1e>;
-def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64Regs, [HI64, LO64]>,
+def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>,
MULT_FM<0, 0x1f>;
def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>;
@@ -189,28 +194,28 @@ def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>;
def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def DCLZ : CountLeading0<"dclz", CPU64Regs>, CLO_FM<0x24>;
-def DCLO : CountLeading1<"dclo", CPU64Regs>, CLO_FM<0x25>;
+def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>;
+def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>;
/// Double Word Swap Bytes/HalfWords
-def DSBH : SubwordSwap<"dsbh", CPU64Regs>, SEB_FM<2, 0x24>;
-def DSHD : SubwordSwap<"dshd", CPU64Regs>, SEB_FM<5, 0x24>;
+def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>;
+def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>;
def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>;
}
let DecoderNamespace = "Mips64" in {
-def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>, RDHWR_FM;
+def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<"dext", CPU64Regs>, EXT_FM<3>;
+def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>;
let Pattern = []<dag> in {
- def DEXTU : ExtBase<"dextu", CPU64Regs>, EXT_FM<2>;
- def DEXTM : ExtBase<"dextm", CPU64Regs>, EXT_FM<1>;
+ def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>;
+ def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>;
}
-def DINS : InsBase<"dins", CPU64Regs>, EXT_FM<7>;
+def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>;
let Pattern = []<dag> in {
- def DINSU : InsBase<"dinsu", CPU64Regs>, EXT_FM<6>;
- def DINSM : InsBase<"dinsm", CPU64Regs>, EXT_FM<5>;
+ def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>;
+ def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>;
}
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -304,38 +309,60 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>;
+def : InstAlias<"move $dst, $src",
+ (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"move $dst, $src",
+ (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 0>,
+ Requires<[HasMips64]>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm),
+ 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"not $rt, $rs",
+ (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64), 1>,
+ Requires<[HasMips64]>;
+def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>;
+def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>,
+ Requires<[HasMips64]>;
+def : InstAlias<"daddu $rs, $rt, $imm",
+ (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+ 1>;
+def : InstAlias<"dadd $rs, $rt, $imm",
+ (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm),
+ 1>;
/// Move between CPU and coprocessor registers
+
let DecoderNamespace = "Mips64" in {
-def MFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
- "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
-def MTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
- "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
-def MFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
- "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
-def MTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
- "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
-def DMFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
+def DMFC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+ (ins CPU64RegsOpnd:$rd, uimm16:$sel),
"dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
-def DMTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
+def DMTC0_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+ (ins CPU64RegsOpnd:$rt),
"dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
-def DMFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel),
+def DMFC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rt),
+ (ins CPU64RegsOpnd:$rd, uimm16:$sel),
"dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
-def DMTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt),
+def DMTC2_3OP64 : MFC3OP<(outs CPU64RegsOpnd:$rd, uimm16:$sel),
+ (ins CPU64RegsOpnd:$rt),
"dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
}
+
// Two operand (implicit 0 selector) versions:
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
def : InstAlias<"dmfc0 $rt, $rd",
- (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+ (DMFC0_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
def : InstAlias<"dmtc0 $rt, $rd",
- (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+ (DMTC0_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
def : InstAlias<"dmfc2 $rt, $rd",
- (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>;
+ (DMFC2_3OP64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rd, 0), 0>;
def : InstAlias<"dmtc2 $rt, $rd",
- (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>;
+ (DMTC2_3OP64 CPU64RegsOpnd:$rd, 0, CPU64RegsOpnd:$rt), 0>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 6ad7e96..1876cb6 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-asm-printer"
-#include "MipsAsmPrinter.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsELFStreamer.h"
#include "Mips.h"
+#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
#include "MipsMCInstLower.h"
#include "llvm/ADT/SmallString.h"
@@ -35,6 +36,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/Mangler.h"
@@ -65,19 +67,28 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- // Do any auto-generated pseudo lowerings.
- if (emitPseudoExpansionLowering(OutStreamer, MI))
- return;
-
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
- MCInst TmpInst0;
- MCInstLowering.Lower(I++, TmpInst0);
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(OutStreamer, &*I))
+ continue;
+
+ // The inMips16Mode() test is not permanent.
+ // Some instructions are marked as pseudo right now which
+ // would make the test fail for the wrong reason but
+ // that will be fixed soon. We need this here because we are
+ // removing another test for this situation downstream in the
+ // callchain.
+ //
+ if (I->isPseudo() && !Subtarget->inMips16Mode())
+ llvm_unreachable("Pseudo opcode found in EmitInstruction()");
+ MCInst TmpInst0;
+ MCInstLowering.Lower(I, TmpInst0);
OutStreamer.EmitInstruction(TmpInst0);
- } while ((I != E) && I->isInsideBundle()); // Delay slot check
+ } while ((++I != E) && I->isInsideBundle()); // Delay slot check
}
//===----------------------------------------------------------------------===//
@@ -221,6 +232,11 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
// OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips"));
OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
}
+
+ if (Subtarget->inMicroMipsMode())
+ if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+ MES->emitMipsSTOCG(*Subtarget, CurrentFnSym,
+ (unsigned)ELF::STO_MIPS_MICROMIPS);
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -236,10 +252,11 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
raw_svector_ostream OS(Str);
printSavedRegsBitmask(OS);
OutStreamer.EmitRawText(OS.str());
-
- OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
- OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
- OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ if (!Subtarget->inMips16Mode()) {
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+ }
}
}
@@ -250,9 +267,11 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
// always be at the function end, and we can't emit and
// break with BB logic.
if (OutStreamer.hasRawTextSupport()) {
- OutStreamer.EmitRawText(StringRef("\t.set\tat"));
- OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
- OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ if (!Subtarget->inMips16Mode()) {
+ OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+ OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+ OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+ }
OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
}
}
@@ -540,6 +559,18 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// return to previous section
if (OutStreamer.hasRawTextSupport())
OutStreamer.EmitRawText(StringRef("\t.previous"));
+
+}
+
+void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+
+ if (OutStreamer.hasRawTextSupport()) return;
+
+ // Emit Mips ELF register info
+ Subtarget->getMReginfo().emitMipsReginfoSectionCG(
+ OutStreamer, getObjFileLowering(), *Subtarget);
+ if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
+ MES->emitELFHeaderFlagsCG(*Subtarget);
}
MachineLocation
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index d8fbeeb..dbdaf26 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -80,6 +80,7 @@ public:
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
};
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 52fa95b..df877b6 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
@@ -62,67 +63,73 @@ class MipsCodeEmitter : public MachineFunctionPass {
static char ID;
- public:
- MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
- MachineFunctionPass(ID), JTI(0),
- II((const MipsInstrInfo *) tm.getInstrInfo()),
- TD(tm.getDataLayout()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
- }
+public:
+ MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0),
+ II((const MipsInstrInfo *) tm.getInstrInfo()), TD(tm.getDataLayout()),
+ TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const {
- return "Mips Machine Code Emitter";
- }
+ virtual const char *getPassName() const {
+ return "Mips Machine Code Emitter";
+ }
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
- void emitInstruction(const MachineInstr &MI);
+ void emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB);
- private:
+private:
- void emitWord(unsigned Word);
+ void emitWord(unsigned Word);
- /// Routines that handle operands which add machine relocations which are
- /// fixed up by the relocation stage.
- void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
- bool MayNeedFarStub) const;
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
- void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+ bool MayNeedFarStub) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const;
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
- unsigned getRelocation(const MachineInstr &MI,
- const MachineOperand &MO) const;
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
- unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getBranchTargetOpValue(const MachineInstr &MI,
- unsigned OpNo) const;
- unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
- void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
- int Offset) const;
- };
+ void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
+ int Offset) const;
+
+ /// \brief Expand pseudo instruction. Return true if MI was expanded.
+ bool expandPseudos(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB) const;
+};
}
char MipsCodeEmitter::ID = 0;
bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- JTI = ((MipsTargetMachine&) MF.getTarget()).getJITInfo();
- II = ((const MipsTargetMachine&) MF.getTarget()).getInstrInfo();
- TD = ((const MipsTargetMachine&) MF.getTarget()).getDataLayout();
+ MipsTargetMachine &Target = static_cast<MipsTargetMachine &>(
+ const_cast<TargetMachine &>(MF.getTarget()));
+
+ JTI = Target.getJITInfo();
+ II = Target.getInstrInfo();
+ TD = Target.getDataLayout();
Subtarget = &TM.getSubtarget<MipsSubtarget> ();
MCPEs = &MF.getConstantPool()->getConstants();
MJTEs = 0;
@@ -139,8 +146,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
- E = MBB->instr_end(); I != E; ++I)
- emitInstruction(*I);
+ E = MBB->instr_end(); I != E;)
+ emitInstruction(*I++, *MBB);
}
} while (MCE.finishFunction(MF));
@@ -265,19 +272,21 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
Reloc, BB));
}
-void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
- DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
-
- MCE.processDebugLoc(MI.getDebugLoc(), true);
+void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
- // Skip pseudo instructions.
- if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+ // Expand pseudo instruction. Skip if MI was not expanded.
+ if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) &&
+ !expandPseudos(MI, MBB))
return;
- emitWord(getBinaryCodeForInstr(MI));
+ MCE.processDebugLoc(MI->getDebugLoc(), true);
+
+ emitWord(getBinaryCodeForInstr(*MI));
++NumEmitted; // Keep track of the # of mi's emitted
- MCE.processDebugLoc(MI.getDebugLoc(), false);
+ MCE.processDebugLoc(MI->getDebugLoc(), false);
}
void MipsCodeEmitter::emitWord(unsigned Word) {
@@ -289,6 +298,25 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
MCE.emitWordBE(Word);
}
+bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
+ MachineBasicBlock &MBB) const {
+ switch (MI->getOpcode()) {
+ case Mips::NOP:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
+ .addReg(Mips::ZERO).addImm(0);
+ break;
+ case Mips::JALRPseudo:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+ default:
+ return false;
+ }
+
+ (MI--)->eraseFromBundle();
+ return true;
+}
+
/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
/// code to the specified MCE object.
FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 041a9d0..d62b166 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,4 +1,4 @@
-//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===//
+//===-- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Simple pass to fills delay slots with useful instructions.
+// Simple pass to fill delay slots with useful instructions.
//
//===----------------------------------------------------------------------===//
@@ -15,7 +15,7 @@
#include "Mips.h"
#include "MipsTargetMachine.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -33,8 +33,7 @@ STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
static cl::opt<bool> DisableDelaySlotFiller(
"disable-mips-delay-filler",
cl::init(false),
- cl::desc("Disable the delay slot filler, which attempts to fill the Mips"
- "delay slots with useful instructions."),
+ cl::desc("Fill all delay slots with NOPs."),
cl::Hidden);
// This option can be used to silence complaints by machine verifier passes.
@@ -45,15 +44,25 @@ static cl::opt<bool> SkipDelaySlotFiller(
cl::Hidden);
namespace {
- struct Filler : public MachineFunctionPass {
- typedef MachineBasicBlock::instr_iterator InstrIter;
- typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter;
+ class RegDefsUses {
+ public:
+ RegDefsUses(TargetMachine &TM);
+ void init(const MachineInstr &MI);
+ bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- InstrIter LastFiller;
+ private:
+ bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
+ bool IsDef) const;
- static char ID;
+ /// Returns true if Reg or its alias is in RegSet.
+ bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
+
+ const TargetRegisterInfo &TRI;
+ BitVector Defs, Uses;
+ };
+
+ class Filler : public MachineFunctionPass {
+ public:
Filler(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
@@ -61,7 +70,6 @@ namespace {
return "Mips Delay Slot Filler";
}
- bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) {
if (SkipDelaySlotFiller)
return false;
@@ -73,66 +81,115 @@ namespace {
return Changed;
}
- bool isDelayFiller(MachineBasicBlock &MBB,
- InstrIter candidate);
+ private:
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
- void insertCallUses(InstrIter MI,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses);
-
- void insertDefsUses(InstrIter MI,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses);
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
- bool IsRegInSet(SmallSet<unsigned, 32> &RegSet,
- unsigned Reg);
+ /// This function checks if it is valid to move Candidate to the delay slot
+ /// and returns true if it isn't. It also updates load and store flags and
+ /// register defs and uses.
+ bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad,
+ bool &SawStore, RegDefsUses &RegDU) const;
- bool delayHasHazard(InstrIter candidate,
- bool &sawLoad, bool &sawStore,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses);
+ bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const;
- bool
- findDelayInstr(MachineBasicBlock &MBB, InstrIter slot,
- InstrIter &Filler);
+ bool terminateSearch(const MachineInstr &Candidate) const;
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ static char ID;
};
char Filler::ID = 0;
} // end of anonymous namespace
+RegDefsUses::RegDefsUses(TargetMachine &TM)
+ : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false),
+ Uses(TRI.getNumRegs(), false) {}
+
+void RegDefsUses::init(const MachineInstr &MI) {
+ // Add all register operands which are explicit and non-variadic.
+ update(MI, 0, MI.getDesc().getNumOperands());
+
+ // If MI is a call, add RA to Defs to prevent users of RA from going into
+ // delay slot.
+ if (MI.isCall())
+ Defs.set(Mips::RA);
+
+ // Add all implicit register operands of branch instructions except
+ // register AT.
+ if (MI.isBranch()) {
+ update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands());
+ Defs.reset(Mips::AT);
+ }
+}
+
+bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
+ BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
+ bool HasHazard = false;
+
+ for (unsigned I = Begin; I != End; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+
+ if (MO.isReg() && MO.getReg())
+ HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef());
+ }
+
+ Defs |= NewDefs;
+ Uses |= NewUses;
+
+ return HasHazard;
+}
+
+bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
+ unsigned Reg, bool IsDef) const {
+ if (IsDef) {
+ NewDefs.set(Reg);
+ // check whether Reg has already been defined or used.
+ return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg));
+ }
+
+ NewUses.set(Reg);
+ // check whether Reg has already been defined.
+ return isRegInSet(Defs, Reg);
+}
+
+bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
+ // Check Reg and all aliased Registers.
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ if (RegSet.test(*AI))
+ return true;
+ return false;
+}
+
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
/// We assume there is only one delay slot per delayed instruction.
-bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- LastFiller = MBB.instr_end();
-
- for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I)
- if (I->hasDelaySlot()) {
- ++FilledSlots;
- Changed = true;
- InstrIter InstrWithSlot = I;
- InstrIter D;
-
- // Delay slot filling is disabled at -O0.
- if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
- findDelayInstr(MBB, I, D)) {
- MBB.splice(llvm::next(I), &MBB, D);
- ++UsefulSlots;
- } else
- BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
-
- // Record the filler instruction that filled the delay slot.
- // The instruction after it will be visited in the next iteration.
- LastFiller = ++I;
-
- // Bundle the delay slot filler to InstrWithSlot so that the machine
- // verifier doesn't expect this instruction to be a terminator.
- MIBundleBuilder(MBB, InstrWithSlot, llvm::next(LastFiller));
- }
- return Changed;
+ for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
+ if (!I->hasDelaySlot())
+ continue;
+
+ ++FilledSlots;
+ Changed = true;
+ Iter D;
+
+ // Delay slot filling is disabled at -O0.
+ if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) &&
+ findDelayInstr(MBB, I, D)) {
+ MBB.splice(llvm::next(I), &MBB, D);
+ ++UsefulSlots;
+ } else
+ BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
+
+ // Bundle the delay slot filler to the instruction with the delay slot.
+ MIBundleBuilder(MBB, I, llvm::next(llvm::next(I)));
+ }
+
+ return Changed;
}
/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
@@ -141,146 +198,57 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
return new Filler(tm);
}
-bool Filler::findDelayInstr(MachineBasicBlock &MBB,
- InstrIter slot,
- InstrIter &Filler) {
- SmallSet<unsigned, 32> RegDefs;
- SmallSet<unsigned, 32> RegUses;
+bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot,
+ Iter &Filler) const {
+ RegDefsUses RegDU(TM);
- insertDefsUses(slot, RegDefs, RegUses);
+ RegDU.init(*Slot);
- bool sawLoad = false;
- bool sawStore = false;
+ bool SawLoad = false;
+ bool SawStore = false;
- for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) {
+ for (ReverseIter I(Slot); I != MBB.rend(); ++I) {
// skip debug value
if (I->isDebugValue())
continue;
- // Convert to forward iterator.
- InstrIter FI(llvm::next(I).base());
-
- if (I->hasUnmodeledSideEffects()
- || I->isInlineAsm()
- || I->isLabel()
- || FI == LastFiller
- || I->isPseudo()
- //
- // Should not allow:
- // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
- // list. TBD.
- )
+ if (terminateSearch(*I))
break;
- if (delayHasHazard(FI, sawLoad, sawStore, RegDefs, RegUses)) {
- insertDefsUses(FI, RegDefs, RegUses);
+ if (delayHasHazard(*I, SawLoad, SawStore, RegDU))
continue;
- }
- Filler = FI;
+ Filler = llvm::next(I).base();
return true;
}
return false;
}
-bool Filler::delayHasHazard(InstrIter candidate,
- bool &sawLoad, bool &sawStore,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses) {
- if (candidate->isImplicitDef() || candidate->isKill())
- return true;
+bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad,
+ bool &SawStore, RegDefsUses &RegDU) const {
+ bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill());
// Loads or stores cannot be moved past a store to the delay slot
// and stores cannot be moved past a load.
- if (candidate->mayLoad()) {
- if (sawStore)
- return true;
- sawLoad = true;
- }
-
- if (candidate->mayStore()) {
- if (sawStore)
- return true;
- sawStore = true;
- if (sawLoad)
- return true;
+ if (Candidate.mayStore() || Candidate.hasOrderedMemoryRef()) {
+ HasHazard |= SawStore | SawLoad;
+ SawStore = true;
+ } else if (Candidate.mayLoad()) {
+ HasHazard |= SawStore;
+ SawLoad = true;
}
- assert((!candidate->isCall() && !candidate->isReturn()) &&
+ assert((!Candidate.isCall() && !Candidate.isReturn()) &&
"Cannot put calls or returns in delay slot.");
- for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
- const MachineOperand &MO = candidate->getOperand(i);
- unsigned Reg;
+ HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
- if (!MO.isReg() || !(Reg = MO.getReg()))
- continue; // skip
-
- if (MO.isDef()) {
- // check whether Reg is defined or used before delay slot.
- if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
- return true;
- }
- if (MO.isUse()) {
- // check whether Reg is defined before delay slot.
- if (IsRegInSet(RegDefs, Reg))
- return true;
- }
- }
- return false;
-}
-
-// Helper function for getting a MachineOperand's register number and adding it
-// to RegDefs or RegUses.
-static void insertDefUse(const MachineOperand &MO,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses,
- unsigned ExcludedReg = 0) {
- unsigned Reg;
-
- if (!MO.isReg() || !(Reg = MO.getReg()) || (Reg == ExcludedReg))
- return;
-
- if (MO.isDef())
- RegDefs.insert(Reg);
- else if (MO.isUse())
- RegUses.insert(Reg);
-}
-
-// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
-void Filler::insertDefsUses(InstrIter MI,
- SmallSet<unsigned, 32> &RegDefs,
- SmallSet<unsigned, 32> &RegUses) {
- unsigned I, E = MI->getDesc().getNumOperands();
-
- for (I = 0; I != E; ++I)
- insertDefUse(MI->getOperand(I), RegDefs, RegUses);
-
- // If MI is a call, add RA to RegDefs to prevent users of RA from going into
- // delay slot.
- if (MI->isCall()) {
- RegDefs.insert(Mips::RA);
- return;
- }
-
- // Return if MI is a return.
- if (MI->isReturn())
- return;
-
- // Examine the implicit operands. Exclude register AT which is in the list of
- // clobbered registers of branch instructions.
- E = MI->getNumOperands();
- for (; I != E; ++I)
- insertDefUse(MI->getOperand(I), RegDefs, RegUses, Mips::AT);
+ return HasHazard;
}
-//returns true if the Reg or its alias is in the RegSet.
-bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) {
- // Check Reg and all aliased Registers.
- for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
- AI.isValid(); ++AI)
- if (RegSet.count(*AI))
- return true;
- return false;
+bool Filler::terminateSearch(const MachineInstr &Candidate) const {
+ return (Candidate.isTerminator() || Candidate.isCall() ||
+ Candidate.isLabel() || Candidate.isInlineAsm() ||
+ Candidate.hasUnmodeledSideEffects());
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index c5f1290..78c74ef 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -96,7 +96,14 @@ private:
SDNode *Select(SDNode *N);
// Complex Pattern.
- bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset);
+ /// (reg + imm).
+ bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const;
bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Alias);
@@ -323,8 +330,8 @@ SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
-bool MipsDAGToDAGISel::
-SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
+bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
EVT ValTy = Addr.getValueType();
// if Address is FI, get the TargetFrameIndex.
@@ -384,21 +391,24 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
return true;
}
}
-
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasFPIdx())
- return false;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
return true;
}
+bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
if (Parent) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index a309040..36e1a15 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -11,8 +11,8 @@
// selection DAG.
//
//===----------------------------------------------------------------------===//
-
#define DEBUG_TYPE "mips-lower"
+#include <set>
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
@@ -55,6 +55,12 @@ Mips16HardFloat("mips16-hard-float", cl::NotHidden,
cl::desc("MIPS: mips16 hard float enable."),
cl::init(false));
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
static const uint16_t O32IntRegs[4] = {
@@ -162,6 +168,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::GPRel: return "MipsISD::GPRel";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
case MipsISD::FPCmp: return "MipsISD::FPCmp";
case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
@@ -205,39 +212,56 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+namespace {
+ struct ltstr {
+ bool operator()(const char *s1, const char *s2) const
+ {
+ return strcmp(s1, s2) < 0;
+ }
+ };
+
+ std::set<const char*, ltstr> noHelperNeeded;
+}
+
+void MipsTargetLowering::SetMips16LibcallName
+ (RTLIB::Libcall l, const char *Name) {
+ setLibcallName(l, Name);
+ noHelperNeeded.insert(Name);
+}
+
void MipsTargetLowering::setMips16HardFloatLibCalls() {
- setLibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
- setLibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
- setLibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
- setLibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
- setLibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
- setLibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
- setLibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
- setLibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
- setLibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
- setLibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
- setLibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
- setLibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
- setLibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
- setLibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
- setLibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
- setLibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
- setLibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
- setLibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
- setLibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
- setLibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
- setLibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
- setLibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
- setLibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
- setLibcallName(RTLIB::O_F32, "__mips16_unordsf2");
- setLibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+ SetMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ SetMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ SetMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ SetMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ SetMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ SetMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ SetMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ SetMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ SetMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ SetMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ SetMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ SetMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ SetMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ SetMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ SetMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ SetMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ SetMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ SetMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ SetMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ SetMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ SetMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ SetMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ SetMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ SetMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ SetMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ SetMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ SetMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ SetMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ SetMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ SetMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ SetMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ SetMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
}
MipsTargetLowering::
@@ -404,6 +428,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
@@ -426,6 +452,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
@@ -498,7 +526,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
- maxStoresPerMemcpy = 16;
+ MaxStoresPerMemcpy = 16;
}
bool
@@ -1026,6 +1054,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
@@ -1207,11 +1236,290 @@ MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
return Sink;
}
+MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::EmitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+
+MachineBasicBlock *MipsTargetLowering::EmitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+
+MachineBasicBlock
+ *MipsTargetLowering::EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+
+MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: llvm_unreachable("Unexpected instr type to insert");
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
case Mips::ATOMIC_LOAD_ADD_I8_P8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
@@ -1317,6 +1625,75 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicCmpSwap(MI, BB, 8);
case Mips::BPOSGE32_PSEUDO:
return EmitBPOSGE32(MI, BB);
+ case Mips::SelBeqZ:
+ return EmitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return EmitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return EmitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return EmitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return EmitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return EmitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return EmitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return EmitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return EmitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return EmitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return EmitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return EmitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return EmitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return EmitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return EmitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return EmitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return EmitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return EmitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return EmitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return EmitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return EmitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return EmitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return EmitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return EmitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
}
}
@@ -2209,6 +2586,34 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT);
}
+// An EH_RETURN is the result of lowering llvm.eh.return which in turn is
+// generated from __builtin_eh_return (offset, handler)
+// The effect of this is to adjust the stack pointer by "offset"
+// and then branch to "handler".
+SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+ const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setCallsEhReturn();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+
+ // Store stack offset in V1, store jump target in V0. Glue CopyToReg and
+ // EH_RETURN nodes, so that instructions are emitted back-to-back.
+ unsigned OffsetReg = IsN64 ? Mips::V1_64 : Mips::V1;
+ unsigned AddrReg = IsN64 ? Mips::V0_64 : Mips::V0;
+ Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
+ Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
+ return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
+ DAG.getRegister(OffsetReg, Ty),
+ DAG.getRegister(AddrReg, getPointerTy()),
+ Chain.getValue(1));
+}
+
// TODO: set SType according to the desired memory barrier behavior.
SDValue
MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
@@ -2754,6 +3159,163 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
/*isVolatile=*/ true, false, 0);
}
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* MipsTargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
SDValue
@@ -2770,6 +3332,26 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
+ const char* mips16HelperFunction = 0;
+ bool needMips16Helper = false;
+
+ if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat &&
+ Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool lookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (noHelperNeeded.find(S->getSymbol()) != noHelperNeeded.end()) {
+ lookupHelper = false;
+ }
+ }
+ if (lookupHelper) mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, needMips16Helper);
+
+ }
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
@@ -2779,9 +3361,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
- MipsCCInfo.analyzeCallOperands(Outs);
+ MipsCCInfo.analyzeCallOperands(Outs, isVarArg);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
@@ -2810,7 +3392,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getPointerTy());
// With EABI is it possible to have 16 args on registers.
- SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ std::deque< std::pair<unsigned, SDValue> > RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin();
@@ -2920,31 +3502,31 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16);
- else if (HasMips64)
- Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_DISP);
- else // O32 & PIC
+ else // N64 || PIC
Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
GlobalOrExternal = true;
}
- SDValue InFlag;
-
- // T9 register operand.
- SDValue T9;
+ SDValue JumpTarget = Callee;
// T9 should contain the address of the callee function if
// -reloction-model=pic or it is an indirect call.
if (IsPICCall || !GlobalOrExternal) {
- // copy to T9
unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
- Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
- InFlag = Chain.getValue(1);
+ unsigned V0Reg = Mips::V0;
+ if (needMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(
+ mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ }
+ else {
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
- if (Subtarget->inMips16Mode())
- T9 = DAG.getRegister(T9Reg, getPointerTy());
- else
- Callee = DAG.getRegister(T9Reg, getPointerTy());
+ if (!Subtarget->inMips16Mode())
+ JumpTarget = SDValue();
+ }
}
// Insert node "GP copy globalreg" before call to function.
@@ -2962,6 +3544,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// chain and flag operands which copy the outgoing args into registers.
// The InFlag in necessary since all emitted instructions must be
// stuck together.
+ SDValue InFlag;
+
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
@@ -2973,9 +3557,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
//
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
+ SmallVector<SDValue, 8> Ops(1, Chain);
+
+ if (JumpTarget.getNode())
+ Ops.push_back(JumpTarget);
// Add argument registers to the end of the list so that they are
// known live into the call.
@@ -2983,10 +3568,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // Add T9 register operand.
- if (T9.getNode())
- Ops.push_back(T9);
-
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
@@ -3065,7 +3646,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
MipsCCInfo.analyzeFormalArguments(Ins);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
@@ -3225,15 +3806,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -3242,9 +3816,9 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
- // guarantee that all emitted copies are
- // stuck together, avoiding something bad
+ // Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
// The mips ABIs for returning structs by value requires that we copy
@@ -3263,15 +3837,17 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag);
Flag = Chain.getValue(1);
- MF.getRegInfo().addLiveOut(V0);
+ RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
}
- // Return on Mips is always a "jr $ra"
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag);
+ RetOps.push_back(Flag);
- // Return Void
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain);
+ // Return on Mips is always a "jr $ra"
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other, &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
@@ -3552,40 +4128,21 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const {
return TargetLowering::getJumpTableEncoding();
}
-MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg,
- bool IsO32, CCState &Info) : CCInfo(Info) {
- UseRegsForByval = true;
-
- if (IsO32) {
- RegSize = 4;
- NumIntArgRegs = array_lengthof(O32IntRegs);
- ReservedArgArea = 16;
- IntArgRegs = ShadowRegs = O32IntRegs;
- FixedFn = VarFn = CC_MipsO32;
- } else {
- RegSize = 8;
- NumIntArgRegs = array_lengthof(Mips64IntRegs);
- ReservedArgArea = 0;
- IntArgRegs = Mips64IntRegs;
- ShadowRegs = Mips64DPRegs;
- FixedFn = CC_MipsN;
- VarFn = CC_MipsN_VarArg;
- }
-
- if (CallConv == CallingConv::Fast) {
- assert(!IsVarArg);
- UseRegsForByval = false;
- ReservedArgArea = 0;
- FixedFn = VarFn = CC_Mips_FastCC;
- }
-
+MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_,
+ CCState &Info)
+ : CCInfo(Info), CallConv(CC), IsO32(IsO32_) {
// Pre-allocate reserved argument area.
- CCInfo.AllocateStack(ReservedArgArea, 1);
+ CCInfo.AllocateStack(reservedArgArea(), 1);
}
void MipsTargetLowering::MipsCC::
-analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
+analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
+ bool IsVarArg) {
+ assert((CallConv != CallingConv::Fast || !IsVarArg) &&
+ "CallingConv::Fast shouldn't be used for vararg functions.");
+
unsigned NumOpnds = Args.size();
+ llvm::CCAssignFn *FixedFn = fixedArgFn(), *VarFn = varArgFn();
for (unsigned I = 0; I != NumOpnds; ++I) {
MVT ArgVT = Args[I].VT;
@@ -3597,10 +4154,10 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
continue;
}
- if (Args[I].IsFixed)
- R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- else
+ if (IsVarArg && !Args[I].IsFixed)
R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ else
+ R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
if (R) {
#ifndef NDEBUG
@@ -3615,6 +4172,7 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) {
void MipsTargetLowering::MipsCC::
analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) {
unsigned NumArgs = Args.size();
+ llvm::CCAssignFn *FixedFn = fixedArgFn();
for (unsigned I = 0; I != NumArgs; ++I) {
MVT ArgVT = Args[I].VT;
@@ -3644,11 +4202,12 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
struct ByValArgInfo ByVal;
+ unsigned RegSize = regSize();
unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize);
unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize),
RegSize * 2);
- if (UseRegsForByval)
+ if (useRegsForByval())
allocateRegs(ByVal, ByValSize, Align);
// Allocate space on caller's stack.
@@ -3659,9 +4218,38 @@ MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
ByValArgs.push_back(ByVal);
}
+unsigned MipsTargetLowering::MipsCC::numIntArgRegs() const {
+ return IsO32 ? array_lengthof(O32IntRegs) : array_lengthof(Mips64IntRegs);
+}
+
+unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
+ return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const {
+ return IsO32 ? O32IntRegs : Mips64IntRegs;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
+ if (CallConv == CallingConv::Fast)
+ return CC_Mips_FastCC;
+
+ return IsO32 ? CC_MipsO32 : CC_MipsN;
+}
+
+llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
+ return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg;
+}
+
+const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
+ return IsO32 ? O32IntRegs : Mips64DPRegs;
+}
+
void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
unsigned ByValSize,
unsigned Align) {
+ unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
+ const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
"Byval argument's size and alignment should be a multiple of"
"RegSize.");
@@ -3726,7 +4314,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains,
// Copy byVal arg to registers and stack.
void MipsTargetLowering::
passByValArg(SDValue Chain, DebugLoc DL,
- SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const MipsCC &CC, const ByValArgInfo &ByVal,
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index c4b38c6..f0f3782 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -20,6 +20,8 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
+#include <deque>
+#include <string>
namespace llvm {
namespace MipsISD {
@@ -63,6 +65,8 @@ namespace llvm {
// Return
Ret,
+ EH_RETURN,
+
// MAdd/Sub nodes
MAdd,
MAddu,
@@ -174,8 +178,16 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
private:
+ void SetMips16LibcallName(RTLIB::Libcall, const char *Name);
+
void setMips16HardFloatLibCalls();
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
unsigned FirstIdx; // Index of the first register used.
@@ -189,53 +201,57 @@ namespace llvm {
/// arguments and inquire about calling convention information.
class MipsCC {
public:
- MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32,
- CCState &Info);
+ MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info);
- void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs);
+ void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ bool IsVarArg);
void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins);
- void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags);
-
const CCState &getCCInfo() const { return CCInfo; }
/// hasByValArg - Returns true if function has byval arguments.
bool hasByValArg() const { return !ByValArgs.empty(); }
- /// useRegsForByval - Returns true if the calling convention allows the
- /// use of registers to pass byval arguments.
- bool useRegsForByval() const { return UseRegsForByval; }
-
/// regSize - Size (in number of bits) of integer registers.
- unsigned regSize() const { return RegSize; }
+ unsigned regSize() const { return IsO32 ? 4 : 8; }
/// numIntArgRegs - Number of integer registers available for calls.
- unsigned numIntArgRegs() const { return NumIntArgRegs; }
+ unsigned numIntArgRegs() const;
/// reservedArgArea - The size of the area the caller reserves for
/// register arguments. This is 16-byte if ABI is O32.
- unsigned reservedArgArea() const { return ReservedArgArea; }
+ unsigned reservedArgArea() const;
- /// intArgRegs - Pointer to array of integer registers.
- const uint16_t *intArgRegs() const { return IntArgRegs; }
+ /// Return pointer to array of integer argument registers.
+ const uint16_t *intArgRegs() const;
typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator;
byval_iterator byval_begin() const { return ByValArgs.begin(); }
byval_iterator byval_end() const { return ByValArgs.end(); }
private:
+ void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags);
+
+ /// useRegsForByval - Returns true if the calling convention allows the
+ /// use of registers to pass byval arguments.
+ bool useRegsForByval() const { return CallConv != CallingConv::Fast; }
+
+ /// Return the function that analyzes fixed argument list functions.
+ llvm::CCAssignFn *fixedArgFn() const;
+
+ /// Return the function that analyzes variable argument list functions.
+ llvm::CCAssignFn *varArgFn() const;
+
+ const uint16_t *shadowRegs() const;
+
void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
unsigned Align);
CCState &CCInfo;
- bool UseRegsForByval;
- unsigned RegSize;
- unsigned NumIntArgRegs;
- unsigned ReservedArgArea;
- const uint16_t *IntArgRegs, *ShadowRegs;
+ CallingConv::ID CallConv;
+ bool IsO32;
SmallVector<ByValArgInfo, 2> ByValArgs;
- llvm::CCAssignFn *FixedFn, *VarFn;
};
// Subtarget Info
@@ -265,6 +281,7 @@ namespace llvm {
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
@@ -294,7 +311,7 @@ namespace llvm {
/// passByValArg - Pass a byval argument in registers or on stack.
void passByValArg(SDValue Chain, DebugLoc DL,
- SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const MipsCC &CC, const ByValArgInfo &ByVal,
@@ -387,6 +404,28 @@ namespace llvm {
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
+ MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+
};
}
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index ab6f8ab..891bdc1 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -107,7 +107,8 @@ multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"),
- [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+ [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>,
+ NeverHasSideEffects;
multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
@@ -138,17 +139,27 @@ class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC,
InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
[(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>;
+
+class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
+ [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
+
class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode addr:$addr))], Itin, FrmFI> {
+ [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
let DecoderMethod = "DecodeFMem";
}
class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin,
Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RC:$rt, addr:$addr)], Itin, FrmFI> {
+ [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
let DecoderMethod = "DecodeFMem";
}
@@ -169,13 +180,17 @@ class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
!strconcat(opstr, "\t$fd, ${index}(${base})"),
- [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI>;
+ [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> {
+ let AddedComplexity = 20;
+}
class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC,
InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
!strconcat(opstr, "\t$fs, ${index}(${base})"),
- [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI>;
+ [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> {
+ let AddedComplexity = 20;
+}
class BC1F_FT<string opstr, InstrItinClass Itin,
SDPatternOperator Op = null_frag> :
@@ -203,15 +218,13 @@ def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>;
def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>;
def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>;
def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>;
-def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>,
- NeverHasSideEffects;
+def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>;
defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>;
defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>;
defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
-defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>,
- NeverHasSideEffects;
+defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>;
let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>;
@@ -228,19 +241,16 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
}
def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>;
-def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>,
- NeverHasSideEffects;
-def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>,
- NeverHasSideEffects;
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>;
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>;
-let Predicates = [NotFP64bit, HasStdEnc], neverHasSideEffects = 1 in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>;
def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>;
}
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64",
- neverHasSideEffects = 1 in {
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>;
def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>;
def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>;
@@ -265,8 +275,8 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
// regardless of register aliasing.
/// Move Control Registers From/To CPU Registers
-def CFC1 : MFC1_FT<"cfc1", CPURegs, CCR, IIFmove>, MFC1_FM<2>;
-def CTC1 : MTC1_FT<"ctc1", CCR, CPURegs, IIFmove>, MFC1_FM<6>;
+def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>;
+def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>;
def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>;
def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>;
def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>;
@@ -437,7 +447,7 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
//===----------------------------------------------------------------------===//
-def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>;
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
@@ -492,3 +502,33 @@ let Predicates = [IsFP64bit, HasStdEnc] in {
def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
+
+// Load/Store patterns.
+let AddedComplexity = 40 in {
+ let Predicates = [IsN64, HasStdEnc] in {
+ def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>;
+ def : MipsPat<(store FGR32:$v, addrRegImm:$a),
+ (SWC1_P8 FGR32:$v, addrRegImm:$a)>;
+ def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>;
+ def : MipsPat<(store FGR64:$v, addrRegImm:$a),
+ (SDC164_P8 FGR64:$v, addrRegImm:$a)>;
+ }
+
+ let Predicates = [NotN64, HasStdEnc] in {
+ def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>;
+ def : MipsPat<(store FGR32:$v, addrRegImm:$a),
+ (SWC1 FGR32:$v, addrRegImm:$a)>;
+ }
+
+ let Predicates = [NotN64, HasMips64, HasStdEnc] in {
+ def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>;
+ def : MipsPat<(store FGR64:$v, addrRegImm:$a),
+ (SDC164 FGR64:$v, addrRegImm:$a)>;
+ }
+
+ let Predicates = [NotN64, NotMips64, HasStdEnc] in {
+ def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>;
+ def : MipsPat<(store AFGR64:$v, addrRegImm:$a),
+ (SDC1 AFGR64:$v, addrRegImm:$a)>;
+ }
+}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index c026b5d..ee432c8 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -366,13 +366,8 @@ class LUI_FM {
let Inst{15-0} = imm16;
}
-class NOP_FM {
- bits<32> Inst;
-
- let Inst{31-0} = 0;
-}
-
class JALR_FM {
+ bits<5> rd;
bits<5> rs;
bits<32> Inst;
@@ -380,7 +375,7 @@ class JALR_FM {
let Inst{31-26} = 0;
let Inst{25-21} = rs;
let Inst{20-16} = 0;
- let Inst{15-11} = 31;
+ let Inst{15-11} = rd;
let Inst{10-6} = 0;
let Inst{5-0} = 9;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 8f2ce6f..de09c9e 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -72,7 +72,8 @@ def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>;
def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
// Return
-def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
@@ -232,6 +233,10 @@ def calltarget64: Operand<i64>;
def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
}
+
+def simm20 : Operand<i32> {
+}
+
def simm16_64 : Operand<i64>;
def shamt : Operand<i32>;
@@ -296,6 +301,10 @@ def HI16 : SDNodeXForm<imm, [{
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
+def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 15-bit sign extended on target immediate.
@@ -325,19 +334,25 @@ def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
// Mips Address Mode! SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
def addr :
- ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 2, "selectIntAddr", [frameindex]>;
+
+def addrRegImm :
+ ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
+
+def addrDefault :
+ ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
// Arithmetic and logical instructions with 3 register operands.
-class ArithLogicR<string opstr, RegisterClass RC, bit isComm = 0,
+class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
InstrItinClass Itin = NoItinerary,
SDPatternOperator OpNode = null_frag>:
- InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
+ InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
- [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], Itin, FrmR> {
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
let isCommutable = isComm;
let isReMaterializable = 1;
string BaseOpcode;
@@ -345,27 +360,27 @@ class ArithLogicR<string opstr, RegisterClass RC, bit isComm = 0,
}
// Arithmetic and logical instructions with 2 register operands.
-class ArithLogicI<string opstr, Operand Od, RegisterClass RC,
+class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
SDPatternOperator imm_type = null_frag,
SDPatternOperator OpNode = null_frag> :
- InstSE<(outs RC:$rt), (ins RC:$rs, Od:$imm16),
+ InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
!strconcat(opstr, "\t$rt, $rs, $imm16"),
- [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu, FrmI> {
+ [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
let isReMaterializable = 1;
}
// Arithmetic Multiply ADD/SUB
class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> :
- InstSE<(outs), (ins CPURegs:$rs, CPURegs:$rt),
+ InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt),
!strconcat(opstr, "\t$rs, $rt"),
- [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul, FrmR> {
+ [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> {
let Defs = [HI, LO];
let Uses = [HI, LO];
let isCommutable = isComm;
}
// Logical
-class LogicNOR<string opstr, RegisterClass RC>:
+class LogicNOR<string opstr, RegisterOperand RC>:
InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
[(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
@@ -374,17 +389,17 @@ class LogicNOR<string opstr, RegisterClass RC>:
// Shifts
class shift_rotate_imm<string opstr, Operand ImmOpnd,
- RegisterClass RC, SDPatternOperator OpNode = null_frag,
+ RegisterOperand RC, SDPatternOperator OpNode = null_frag,
SDPatternOperator PF = null_frag> :
InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
!strconcat(opstr, "\t$rd, $rt, $shamt"),
[(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
-class shift_rotate_reg<string opstr, RegisterClass RC,
+class shift_rotate_reg<string opstr, RegisterOperand RC,
SDPatternOperator OpNode = null_frag>:
- InstSE<(outs RC:$rd), (ins CPURegs:$rs, RC:$rt),
+ InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
!strconcat(opstr, "\t$rd, $rt, $rs"),
- [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu, FrmR>;
+ [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
// Load Upper Imediate
class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
@@ -498,15 +513,16 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
// SetCC
class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
- InstSE<(outs CPURegs:$rd), (ins RC:$rs, RC:$rt),
+ InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
- [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
+ [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
RegisterClass RC>:
- InstSE<(outs CPURegs:$rt), (ins RC:$rs, Od:$imm16),
+ InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
!strconcat(opstr, "\t$rt, $rs, $imm16"),
- [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>;
+ [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
+ IIAlu, FrmI>;
// Jump
class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
@@ -559,12 +575,17 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
let DecoderMethod = "DecodeJumpTarget";
}
+ class JumpLinkRegPseudo<RegisterClass RC, Instruction JALRInst,
+ Register RetReg>:
+ PseudoSE<(outs), (ins RC:$rs), [(MipsJmpLink RC:$rs)], IIBranch>,
+ PseudoInstExpansion<(JALRInst RetReg, RC:$rs)>;
+
class JumpLinkReg<string opstr, RegisterClass RC>:
- InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"),
- [(MipsJmpLink RC:$rs)], IIBranch, FrmR>;
+ InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [], IIBranch, FrmR>;
- class BGEZAL_FT<string opstr, RegisterClass RC> :
- InstSE<(outs), (ins RC:$rs, brtarget:$offset),
+ class BGEZAL_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, brtarget:$offset),
!strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
}
@@ -585,19 +606,19 @@ class SYNC_FT :
NoItinerary, FrmOther>;
// Mul, Div
-class Mult<string opstr, InstrItinClass itin, RegisterClass RC,
+class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
- InstSE<(outs), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
+ InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
itin, FrmR> {
let isCommutable = 1;
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class Div<SDNode op, string opstr, InstrItinClass itin, RegisterClass RC,
+class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
- InstSE<(outs), (ins RC:$rs, RC:$rt),
- !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RC:$rs, RC:$rt)], itin,
+ InstSE<(outs), (ins RO:$rs, RO:$rt),
+ !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin,
FrmR> {
let Defs = DefRegs;
}
@@ -623,14 +644,14 @@ class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> :
}
// Count Leading Ones/Zeros in Word
-class CountLeading0<string opstr, RegisterClass RC>:
- InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RC:$rd, (ctlz RC:$rs))], IIAlu, FrmR>,
+class CountLeading0<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>,
Requires<[HasBitCount, HasStdEnc]>;
-class CountLeading1<string opstr, RegisterClass RC>:
- InstSE<(outs RC:$rd), (ins RC:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu, FrmR>,
+class CountLeading1<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
+ [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>,
Requires<[HasBitCount, HasStdEnc]>;
@@ -642,31 +663,31 @@ class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> :
}
// Subword Swap
-class SubwordSwap<string opstr, RegisterClass RC>:
- InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
+class SubwordSwap<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
NoItinerary, FrmR> {
let Predicates = [HasSwap, HasStdEnc];
let neverHasSideEffects = 1;
}
// Read Hardware
-class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass> :
- InstSE<(outs CPURegClass:$rt), (ins HWRegClass:$rd), "rdhwr\t$rt, $rd", [],
+class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> :
+ InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [],
IIAlu, FrmR>;
// Ext and Ins
-class ExtBase<string opstr, RegisterClass RC>:
- InstSE<(outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$size),
+class ExtBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$size))], NoItinerary,
+ [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
FrmR> {
let Predicates = [HasMips32r2, HasStdEnc];
}
-class InsBase<string opstr, RegisterClass RC>:
- InstSE<(outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ins:$size, RC:$src),
+class InsBase<string opstr, RegisterOperand RO>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$size, RC:$src))],
+ [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
NoItinerary, FrmR> {
let Predicates = [HasMips32r2, HasStdEnc];
let Constraints = "$src = $rt";
@@ -699,15 +720,15 @@ multiclass AtomicCmpSwap32<PatFrag Op> {
}
}
-class LLBase<string opstr, RegisterClass RC, Operand Mem> :
- InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let mayLoad = 1;
}
-class SCBase<string opstr, RegisterClass RC, Operand Mem> :
- InstSE<(outs RC:$dst), (ins RC:$rt, Mem:$addr),
+class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
@@ -769,42 +790,48 @@ let usesCustomInserter = 1 in {
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<"addiu", simm16, CPURegs, immSExt16, add>,
+def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
ADDI_FM<0x9>, IsAsCheapAsAMove;
-def ADDi : ArithLogicI<"addi", simm16, CPURegs>, ADDI_FM<0x8>;
+def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
-def ANDi : ArithLogicI<"andi", uimm16, CPURegs, immZExt16, and>, ADDI_FM<0xc>;
-def ORi : ArithLogicI<"ori", uimm16, CPURegs, immZExt16, or>, ADDI_FM<0xd>;
-def XORi : ArithLogicI<"xori", uimm16, CPURegs, immZExt16, xor>, ADDI_FM<0xe>;
+def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+ ADDI_FM<0xc>;
+def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+ ADDI_FM<0xd>;
+def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+ ADDI_FM<0xe>;
def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<"addu", CPURegs, 1, IIAlu, add>, ADD_FM<0, 0x21>;
-def SUBu : ArithLogicR<"subu", CPURegs, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
-def MUL : ArithLogicR<"mul", CPURegs, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
-def ADD : ArithLogicR<"add", CPURegs>, ADD_FM<0, 0x20>;
-def SUB : ArithLogicR<"sub", CPURegs>, ADD_FM<0, 0x22>;
+def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
+def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
+def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
+def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
-def AND : ArithLogicR<"and", CPURegs, 1, IIAlu, and>, ADD_FM<0, 0x24>;
-def OR : ArithLogicR<"or", CPURegs, 1, IIAlu, or>, ADD_FM<0, 0x25>;
-def XOR : ArithLogicR<"xor", CPURegs, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
-def NOR : LogicNOR<"nor", CPURegs>, ADD_FM<0, 0x27>;
+def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
+def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
+def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
+def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def SLL : shift_rotate_imm<"sll", shamt, CPURegs, shl, immZExt5>, SRA_FM<0, 0>;
-def SRL : shift_rotate_imm<"srl", shamt, CPURegs, srl, immZExt5>, SRA_FM<2, 0>;
-def SRA : shift_rotate_imm<"sra", shamt, CPURegs, sra, immZExt5>, SRA_FM<3, 0>;
-def SLLV : shift_rotate_reg<"sllv", CPURegs, shl>, SRLV_FM<4, 0>;
-def SRLV : shift_rotate_reg<"srlv", CPURegs, srl>, SRLV_FM<6, 0>;
-def SRAV : shift_rotate_reg<"srav", CPURegs, sra>, SRLV_FM<7, 0>;
+def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+ SRA_FM<0, 0>;
+def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+ SRA_FM<2, 0>;
+def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+ SRA_FM<3, 0>;
+def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
// Rotate Instructions
let Predicates = [HasMips32r2, HasStdEnc] in {
- def ROTR : shift_rotate_imm<"rotr", shamt, CPURegs, rotr, immZExt5>,
+ def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
SRA_FM<2, 1>;
- def ROTRV : shift_rotate_reg<"rotrv", CPURegs, rotr>, SRLV_FM<6, 1>;
+ def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
}
/// Load and Store Instructions
@@ -828,13 +855,13 @@ def SYNC : SYNC_FT, SYNC_FM;
/// Load-linked, Store-conditional
let Predicates = [NotN64, HasStdEnc] in {
- def LL : LLBase<"ll", CPURegs, mem>, LW_FM<0x30>;
- def SC : SCBase<"sc", CPURegs, mem>, LW_FM<0x38>;
+ def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>;
+ def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>;
}
let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LL_P8 : LLBase<"ll", CPURegs, mem64>, LW_FM<0x30>;
- def SC_P8 : SCBase<"sc", CPURegs, mem64>, LW_FM<0x38>;
+ def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>;
+ def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>;
}
/// Jump and Branch Instructions
@@ -853,18 +880,41 @@ def BAL_BR: BAL_FT, BAL_FM;
def JAL : JumpLink<"jal">, FJ<3>;
def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM;
-def BGEZAL : BGEZAL_FT<"bgezal", CPURegs>, BGEZAL_FM<0x11>;
-def BLTZAL : BGEZAL_FT<"bltzal", CPURegs>, BGEZAL_FM<0x10>;
+def JALRPseudo : JumpLinkRegPseudo<CPURegs, JALR, RA>;
+def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>;
+def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>;
def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall;
def RET : RetBase<CPURegs>, MTLO_FM<8>;
+// Exception handling related node and instructions.
+// The conversion sequence is:
+// ISD::EH_RETURN -> MipsISD::EH_RETURN ->
+// MIPSeh_return -> (stack change + indirect branch)
+//
+// MIPSeh_return takes the place of regular return instruction
+// but takes two arguments (V1, V0) which are used for storing
+// the offset and return address respectively.
+def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+
+def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def MIPSeh_return32 : MipsPseudo<(outs), (ins CPURegs:$spoff, CPURegs:$dst),
+ [(MIPSehret CPURegs:$spoff, CPURegs:$dst)]>;
+ def MIPSeh_return64 : MipsPseudo<(outs), (ins CPU64Regs:$spoff,
+ CPU64Regs:$dst),
+ [(MIPSehret CPU64Regs:$spoff, CPU64Regs:$dst)]>;
+}
+
/// Multiply and Divide Instructions.
-def MULT : Mult<"mult", IIImul, CPURegs, [HI, LO]>, MULT_FM<0, 0x18>;
-def MULTu : Mult<"multu", IIImul, CPURegs, [HI, LO]>, MULT_FM<0, 0x19>;
-def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegs, [HI, LO]>, MULT_FM<0, 0x1a>;
-def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegs, [HI, LO]>,
+def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
+def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>,
+ MULT_FM<0, 0x1a>;
+def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>,
MULT_FM<0, 0x1b>;
def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>;
@@ -877,15 +927,14 @@ def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>;
def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def CLZ : CountLeading0<"clz", CPURegs>, CLO_FM<0x20>;
-def CLO : CountLeading1<"clo", CPURegs>, CLO_FM<0x21>;
+def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>;
+def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>;
/// Word Swap Bytes Within Halfwords
-def WSBH : SubwordSwap<"wsbh", CPURegs>, SEB_FM<2, 0x20>;
+def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>;
/// No operation.
-/// FIXME: NOP should be an alias of "sll $0, $0, 0".
-def NOP : InstSE<(outs), (ins), "nop", [], IIAlu, FrmJ>, NOP_FM;
+def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
// FrameIndexes are legalized when they are operands from load/store
// instructions. The same not happens for stack address copies, so an
@@ -899,66 +948,86 @@ def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>;
def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>;
def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>;
-def RDHWR : ReadHardware<CPURegs, HWRegs>, RDHWR_FM;
+def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM;
-def EXT : ExtBase<"ext", CPURegs>, EXT_FM<0>;
-def INS : InsBase<"ins", CPURegs>, EXT_FM<4>;
+def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>;
+def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
-def MFC0_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel),
+def MFC0_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+ (ins CPURegsOpnd:$rd, uimm16:$sel),
"mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
-def MTC0_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt),
+def MTC0_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+ (ins CPURegsOpnd:$rt),
"mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
-def MFC2_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel),
+def MFC2_3OP : MFC3OP<(outs CPURegsOpnd:$rt),
+ (ins CPURegsOpnd:$rd, uimm16:$sel),
"mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
-def MTC2_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt),
+def MTC2_3OP : MFC3OP<(outs CPURegsOpnd:$rd, uimm16:$sel),
+ (ins CPURegsOpnd:$rt),
"mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>;
-def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>;
-def : InstAlias<"addu $rs,$rt,$imm",
- (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"add $rs,$rt,$imm",
- (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"and $rs,$rt,$imm",
- (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"j $rs", (JR CPURegs:$rs)>;
-def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>;
-def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>;
-def : InstAlias<"slt $rs,$rt,$imm",
- (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"xor $rs,$rt,$imm",
- (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>;
-def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>;
+def : InstAlias<"move $dst, $src",
+ (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>,
+ Requires<[NotMips64]>;
+def : InstAlias<"move $dst, $src",
+ (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>;
+def : InstAlias<"addu $rs, $rt, $imm",
+ (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"add $rs, $rt, $imm",
+ (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"and $rs, $rt, $imm",
+ (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>;
+def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>;
+def : InstAlias<"not $rt, $rs",
+ (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>;
+def : InstAlias<"neg $rt, $rs",
+ (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"negu $rt, $rs",
+ (SUBu CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs), 1>;
+def : InstAlias<"slt $rs, $rt, $imm",
+ (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
+def : InstAlias<"xor $rs, $rt, $imm",
+ (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ Requires<[NotMips64]>;
+def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+def : InstAlias<"mfc0 $rt, $rd",
+ (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc0 $rt, $rd",
+ (MTC0_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
+def : InstAlias<"mfc2 $rt, $rd",
+ (MFC2_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>;
+def : InstAlias<"mtc2 $rt, $rd",
+ (MTC2_3OP CPURegsOpnd:$rd, 0, CPURegsOpnd:$rt), 0>;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
-class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
+class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>;
+def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>;
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr),
+class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
!strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>;
+def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>;
-class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> :
- MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32),
+class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>;
+def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>;
@@ -1045,7 +1114,7 @@ def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
// Mips does not have "not", so we expand our way
def : MipsPat<(not CPURegs:$in),
- (NOR CPURegs:$in, ZERO)>;
+ (NOR CPURegsOpnd:$in, ZERO)>;
// extended loads
let Predicates = [NotN64, HasStdEnc] in {
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 30f68b1..2efe534 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -10,10 +10,10 @@
// This pass expands a branch or jump instruction into a long branch if its
// offset is too large to fit into its immediate field.
//
-// FIXME:
-// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
+// FIXME:
+// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
// 2. If program has inline assembly statements whose size cannot be
-// determined accurately, load branch target addresses from the GOT.
+// determined accurately, load branch target addresses from the GOT.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-long-branch"
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 0c71596..59b23f7 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -56,4 +56,20 @@ unsigned MipsFunctionInfo::getMips16SPAliasReg() {
return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC);
}
+void MipsFunctionInfo::createEhDataRegsFI() {
+ for (int I = 0; I < 4; ++I) {
+ const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+ const TargetRegisterClass *RC = ST.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+ RC->getAlignment(), false);
+ }
+}
+
+bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
+ return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
+ || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
+}
+
void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index eb6e1cf..b05b348 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -53,10 +53,16 @@ class MipsFunctionInfo : public MachineFunctionInfo {
/// Size of incoming argument area.
unsigned IncomingArgSize;
+ /// CallsEhReturn - Whether the function calls llvm.eh.return.
+ bool CallsEhReturn;
+
+ /// Frame objects for spilling eh data registers.
+ int EhDataRegFI[4];
+
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
- VarArgsFrameIndex(0)
+ VarArgsFrameIndex(0), CallsEhReturn(false)
{}
unsigned getSRetReturnReg() const { return SRetReturnReg; }
@@ -78,6 +84,14 @@ public:
}
unsigned getIncomingArgSize() const { return IncomingArgSize; }
+
+ bool callsEhReturn() const { return CallsEhReturn; }
+ void setCallsEhReturn() { CallsEhReturn = true; }
+
+ void createEhDataRegsFI();
+ int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
+ bool isEhDataRegFI(int FI) const;
+
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 70eb6f3..3250733 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -47,6 +47,28 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+unsigned
+MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case Mips::CPURegsRegClassID:
+ case Mips::CPU64RegsRegClassID:
+ case Mips::DSPRegsRegClassID: {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return 28 - TFI->hasFP(MF);
+ }
+ case Mips::FGR32RegClassID:
+ return 32;
+ case Mips::AFGR64RegClassID:
+ return 16;
+ case Mips::FGR64RegClassID:
+ return 32;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Callee Saved Registers methods
//===----------------------------------------------------------------------===//
@@ -155,21 +177,14 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
// direct reference.
void MipsRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const {
+ unsigned FIOperandNum, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- unsigned i = 0;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
errs() << "<--------->\n" << MI);
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
uint64_t stackSize = MF.getFrameInfo()->getStackSize();
int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
@@ -177,7 +192,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
<< "spOffset : " << spOffset << "\n"
<< "stackSize : " << stackSize << "\n");
- eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
+ eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
}
unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 78adf7f..13b2a6a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,6 +42,8 @@ public:
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
@@ -53,7 +55,8 @@ public:
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f07a10c..f93dd86 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -331,3 +331,48 @@ def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
// Accumulator Registers
def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+
+def CPURegsAsmOperand : AsmOperandClass {
+ let Name = "CPURegsAsm";
+ let ParserMethod = "parseCPURegs";
+}
+
+def CPU64RegsAsmOperand : AsmOperandClass {
+ let Name = "CPU64RegsAsm";
+ let ParserMethod = "parseCPU64Regs";
+}
+
+def CCRAsmOperand : AsmOperandClass {
+ let Name = "CCRAsm";
+ let ParserMethod = "parseCCRRegs";
+}
+
+def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> {
+ let ParserMatchClass = CPURegsAsmOperand;
+}
+
+def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> {
+ let ParserMatchClass = CPU64RegsAsmOperand;
+}
+
+def CCROpnd : RegisterOperand<CCR, "printCPURegs"> {
+ let ParserMatchClass = CCRAsmOperand;
+}
+
+def HWRegsAsmOperand : AsmOperandClass {
+ let Name = "HWRegsAsm";
+ let ParserMethod = "parseHWRegs";
+}
+
+def HW64RegsAsmOperand : AsmOperandClass {
+ let Name = "HW64RegsAsm";
+ let ParserMethod = "parseHW64Regs";
+}
+
+def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> {
+ let ParserMatchClass = HWRegsAsmOperand;
+}
+
+def HW64RegsOpnd : RegisterOperand<HWRegs64, "printCPURegs"> {
+ let ParserMatchClass = HW64RegsAsmOperand;
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 60b1233..0dd6713 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -29,9 +29,21 @@
using namespace llvm;
+unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
+ static const unsigned EhDataReg[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned EhDataReg64[] = {
+ Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64
+ };
+
+ return STI.isABI_N64() ? EhDataReg64[I] : EhDataReg[I];
+}
+
void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MipsRegisterInfo *RegInfo =
static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
const MipsSEInstrInfo &TII =
@@ -105,6 +117,30 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+ if (MipsFI->callsEhReturn()) {
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ // Insert instructions that spill eh data registers.
+ for (int I = 0; I < 4; ++I) {
+ if (!MBB.isLiveIn(ehDataReg(I)))
+ MBB.addLiveIn(ehDataReg(I));
+ TII.storeRegToStackSlot(MBB, MBBI, ehDataReg(I), false,
+ MipsFI->getEhDataRegFI(I), RC, RegInfo);
+ }
+
+ // Emit .cfi_offset directives for eh data registers.
+ MCSymbol *CSLabel2 = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl,
+ TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel2);
+ for (int I = 0; I < 4; ++I) {
+ int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I));
+ DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+ SrcML = MachineLocation(ehDataReg(I));
+ Moves.push_back(MachineMove(CSLabel2, DstML, SrcML));
+ }
+ }
+
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
@@ -124,6 +160,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
DebugLoc dl = MBBI->getDebugLoc();
@@ -144,6 +183,22 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
}
+ if (MipsFI->callsEhReturn()) {
+ const TargetRegisterClass *RC = STI.isABI_N64() ?
+ &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass;
+
+ // Find first instruction that restores a callee-saved register.
+ MachineBasicBlock::iterator I = MBBI;
+ for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+ --I;
+
+ // Insert instructions that restore eh data registers.
+ for (int J = 0; J < 4; ++J) {
+ TII.loadRegFromStackSlot(MBB, I, ehDataReg(J), MipsFI->getEhDataRegFI(J),
+ RC, RegInfo);
+ }
+ }
+
// Get the number of bytes from FrameInfo
uint64_t StackSize = MFI->getStackSize();
@@ -194,16 +249,41 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
}
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsSEFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ TII.adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
+
void MipsSEFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
// Mark $fp as used if function has dedicated frame pointer.
if (hasFP(MF))
MRI.setPhysRegUsed(FP);
+ // Create spill slots for eh data registers if function calls eh_return.
+ if (MipsFI->callsEhReturn())
+ MipsFI->createEhDataRegsFI();
+
// Set scavenging frame index if necessary.
uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() +
estimateStackSize(MF);
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 6481a0a..7becd25 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -28,6 +28,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -37,6 +41,7 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
+ unsigned ehDataReg(unsigned I) const;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index cd8f9f4..a9809ef 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -220,6 +220,10 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::ExtractElementF64:
ExpandExtractElementF64(MBB, MI);
break;
+ case Mips::MIPSeh_return32:
+ case Mips::MIPSeh_return64:
+ ExpandEhReturn(MBB, MI);
+ break;
}
MBB.erase(MI);
@@ -356,6 +360,31 @@ void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
.addReg(HiReg);
}
+void MipsSEInstrInfo::ExpandEhReturn(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // This pseudo instruction is generated as part of the lowering of
+ // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and
+ // indirect jump to TargetReg
+ const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+ unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned OR = STI.isABI_N64() ? Mips::OR64 : Mips::OR;
+ unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
+ unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
+ unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned OffsetReg = I->getOperand(0).getReg();
+ unsigned TargetReg = I->getOperand(1).getReg();
+
+ // or $ra, $v0, $zero
+ // addu $sp, $sp, $v1
+ // jr $ra
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(OR), RA)
+ .addReg(TargetReg).addReg(ZERO);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
+ .addReg(SP).addReg(OffsetReg);
+ BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA);
+}
+
const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
return new MipsSEInstrInfo(TM);
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 55b78b2..3e22b33 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -85,6 +85,8 @@ private:
MachineBasicBlock::iterator I) const;
void ExpandBuildPairF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
+ void ExpandEhReturn(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
};
}
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index abeab7b..a39b393 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -54,28 +54,6 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsSERegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
-
- if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
- Amount = -Amount;
-
- const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
- unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
-
- II->adjustStackPtr(SP, Amount, MBB, I);
- }
-
- MBB.erase(I);
-}
-
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned OpNo, int FrameIndex,
uint64_t StackSize,
@@ -83,6 +61,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -93,15 +72,18 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
}
+ bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
+
// The following stack frame objects are always referenced relative to $sp:
// 1. Outgoing arguments.
// 2. Pointer to dynamically allocated stack space.
// 3. Locations for callee-saved registers.
+ // 4. Locations for eh data registers.
// Everything else is referenced relative to whatever register
// getFrameRegister() returns.
unsigned FrameReg;
- if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
+ if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 7437bd3..f6827e9 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -31,10 +31,6 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 30d377a..75b4c98 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -26,13 +26,14 @@ void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
- Reloc::Model RM) :
+ Reloc::Model _RM) :
MipsGenSubtargetInfo(TT, CPU, FS),
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
HasBitCount(false), HasFPIdx(false),
- InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false)
+ InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+ IsAndroid(false), RM(_RM)
{
std::string CPUName = CPU;
if (CPUName.empty())
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 6a20815..32baa3d 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -14,6 +14,7 @@
#ifndef MIPSSUBTARGET_H
#define MIPSSUBTARGET_H
+#include "MCTargetDesc/MipsReginfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -88,6 +89,9 @@ protected:
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
+ // InMicroMips -- can process MicroMips instructions
+ bool InMicroMipsMode;
+
// HasDSP, HasDSPR2 -- supports DSP ASE.
bool HasDSP, HasDSPR2;
@@ -96,6 +100,12 @@ protected:
InstrItineraryData InstrItins;
+ // The instance to the register info section object
+ MipsReginfo MRI;
+
+ // Relocation Model
+ Reloc::Model RM;
+
public:
virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
@@ -131,6 +141,7 @@ public:
bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
+ bool inMicroMipsMode() const { return InMicroMipsMode; }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
bool isAndroid() const { return IsAndroid; }
@@ -145,6 +156,12 @@ public:
bool hasSwap() const { return HasSwap; }
bool hasBitCount() const { return HasBitCount; }
bool hasFPIdx() const { return HasFPIdx; }
+
+ // Grab MipsRegInfo object
+ const MipsReginfo &getMReginfo() const { return MRI; }
+
+ // Grab relocation model
+ Reloc::Model getRelocationModel() const {return RM;}
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 9aea764..4c748c5 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -38,6 +38,20 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
+ // Register info information
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ if (Subtarget.isABI_N64() || Subtarget.isABI_N32())
+ ReginfoSection =
+ getContext().getELFSection(".MIPS.options",
+ ELF::SHT_MIPS_OPTIONS,
+ ELF::SHF_ALLOC |ELF::SHF_MIPS_NOSTRIP,
+ SectionKind::getMetadata());
+ else
+ ReginfoSection =
+ getContext().getELFSection(".reginfo",
+ ELF::SHT_MIPS_REGINFO,
+ ELF::SHF_ALLOC,
+ SectionKind::getMetadata());
}
// A address must be loaded from a small section if its size is less than the
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index c394a9d..c0e9140 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -17,6 +17,7 @@ namespace llvm {
class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
+ const MCSection *ReginfoSection;
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM);
@@ -35,6 +36,7 @@ namespace llvm {
const TargetMachine &TM) const;
// TODO: Classify globals as mips wishes.
+ const MCSection *getReginfoSection() const { return ReginfoSection; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7cb16b4..47baef6 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
- VectorElementize.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 1d41665..6191819 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -30,8 +30,9 @@ void NVPTXMCAsmInfo::anchor() { }
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::nvptx64)
- PointerSize = 8;
+ if (TheTriple.getArch() == Triple::nvptx64) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
CommentString = "//";
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 097b50a..b46ea88 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
-FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 22da8f3..0115e1f 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
O << getNVPTXRegClassStr(RC) << mapped_vr;
return;
}
- // Vector virtual register
- if (getNVPTXVectorSize(RC) == 4)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
- << "}";
- else if (getNVPTXVectorSize(RC) == 2)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
- << "}";
- else
- llvm_unreachable("Unsupported vector size");
+ report_fatal_error("Bad register!");
}
void
@@ -1314,7 +1300,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
O << "shared" ;
break;
default:
- llvm_unreachable("unexpected address space");
+ report_fatal_error("Bad address space found while emitting PTX");
+ break;
}
}
@@ -2023,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
- case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
- case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
- case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
- case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
- case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
- case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
- case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
- case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
- case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
- case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
- case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
- case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
- case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
- case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
- case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
- case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
- case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
- case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
- case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
- case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
@@ -2056,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
- case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
- case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
- case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
- case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
- case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
- case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
- case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
- case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
- case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
- case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 50072c5..bb2c55c 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -74,3 +74,14 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void NVPTXFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN,
+ // ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index ee87b39..d34e7be 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -33,6 +33,10 @@ public:
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
};
} // End llvm namespace
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 36ab7f5..481f13a 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
ResNode = SelectStore(N);
break;
+ case NVPTXISD::LoadV2:
+ case NVPTXISD::LoadV4:
+ ResNode = SelectLoadVector(N);
+ break;
+ case NVPTXISD::LDGV2:
+ case NVPTXISD::LDGV4:
+ case NVPTXISD::LDUV2:
+ case NVPTXISD::LDUV4:
+ ResNode = SelectLDGLDUVector(N);
+ break;
+ case NVPTXISD::StoreV2:
+ case NVPTXISD::StoreV4:
+ ResNode = SelectStoreVector(N);
+ break;
+ default: break;
}
if (ResNode)
return ResNode;
@@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N1.getNode(), N1, Base, Offset):
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
MVT::Other, Ops, 8);
}
else {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return NVPTXLD;
}
+SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT LoadedVT = MemSD->getMemoryVT();
+
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int FromType;
+ // The last operand holds the original LoadSDNode::getExtensionType() value
+ unsigned ExtensionType =
+ cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
+ if (ExtensionType == ISD::SEXTLOAD)
+ FromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ FromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ FromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
+ case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
+ default: return NULL;
+ }
+
+ EVT EltVT = N->getValueType(0);
+
+ if (SelectDirectAddr(Op1, Addr)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Addr, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
+ SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Base, Offset, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
+ SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Base, Offset, Chain };
+
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ }
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+
+ EVT RetVT = N->getValueType(0);
+
+ // Select opcode
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N2.getNode(), N2, Base, Offset):
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 9);
} else {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return NVPTXST;
}
+SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *ST;
+ EVT EltVT = Op1.getValueType();
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT StoreVT = MemSD->getMemoryVT();
+
+ // Address Space Setting
+ unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
+ report_fatal_error("Cannot store to pointer that points to constant "
+ "memory space");
+ }
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ assert(StoreVT.isSimple() && "Store value is not simple");
+ MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
+ unsigned ToTypeWidth = ScalarVT.getSizeInBits();
+ unsigned ToType;
+ if (ScalarVT.isFloatingPoint())
+ ToType = NVPTX::PTXLdStInstCode::Float;
+ else
+ ToType = NVPTX::PTXLdStInstCode::Unsigned;
+
+
+ SmallVector<SDValue, 12> StOps;
+ SDValue N2;
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::StoreV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ N2 = N->getOperand(3);
+ break;
+ case NVPTXISD::StoreV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ StOps.push_back(N->getOperand(3));
+ StOps.push_back(N->getOperand(4));
+ N2 = N->getOperand(5);
+ break;
+ default: return NULL;
+ }
+
+ StOps.push_back(getI32Imm(IsVolatile));
+ StOps.push_back(getI32Imm(CodeAddrSpace));
+ StOps.push_back(getI32Imm(VecType));
+ StOps.push_back(getI32Imm(ToType));
+ StOps.push_back(getI32Imm(ToTypeWidth));
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break;
+ }
+ break;
+ }
+ StOps.push_back(Addr);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(N2.getNode(), N2, Base, Offset):
+ SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break;
+ }
+ break;
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(N2.getNode(), N2, Base, Offset):
+ SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(N2);
+ }
+
+ StOps.push_back(Chain);
+
+ ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return ST;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 14f2091..4ec9241 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -72,8 +72,11 @@ private:
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
- SDNode* SelectLoad(SDNode *N);
- SDNode* SelectStore(SDNode *N);
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectLoadVector(SDNode *N);
+ SDNode *SelectLDGLDUVector(SDNode *N);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectStoreVector(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b3ab9fc..5ee747a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -45,15 +45,27 @@ using namespace llvm;
static unsigned int uniqueCallSite = 0;
static cl::opt<bool>
-RetainVectorOperands("nvptx-codegen-vectors",
- cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
- cl::init(true));
-
-static cl::opt<bool>
sched4reg("nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"),
cl::init(false));
+static bool IsPTXVectorType(MVT VT) {
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::v2i8:
+ case MVT::v4i8:
+ case MVT::v2i16:
+ case MVT::v4i16:
+ case MVT::v2i32:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return true;
+ }
+}
+
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
: TargetLowering(TM, new NVPTXTargetObjectFile()),
@@ -63,9 +75,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
// always lower memset, memcpy, and memmove intrinsics to load/store
// instructions, rather
// then generating calls to memset, mempcy or memmove.
- maxStoresPerMemset = (unsigned)0xFFFFFFFF;
- maxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
- maxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+ MaxStoresPerMemset = (unsigned)0xFFFFFFFF;
+ MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
+ MaxStoresPerMemmove = (unsigned)0xFFFFFFFF;
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
- if (RetainVectorOperands) {
- addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
- addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
- addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
- addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
- addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
- addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
- addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
- addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
- addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
- addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
-
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
- }
-
// Operations not directly supported by NVPTX.
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
@@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- // By default, CONCAT_VECTORS is implemented via store/load
- // through stack. It is slow and uses local memory. We need
- // to custom-lowering them.
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
-
- // Expand vector int to float and float to int conversions
- // - For SINT_TO_FP and UINT_TO_FP, the src type
- // (Node->getOperand(0).getValueType())
- // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
- // the dest type (Node->getValueType(0)) is used.
- //
- // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
- // case, and
- // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
- //
- // That is why v4i32 or v2i32 are used here.
- //
- // The expansion for vectors happens in VectorLegalizer::LegalizeOp()
- // (LegalizeVectorOps.cpp).
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ // Register custom handling for vector loads/stores
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (IsPTXVectorType(VT)) {
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
+ }
+ }
// Now deduce the information based on the above mentioned
// actions
@@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
+ case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
+ case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
+ case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
+ case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
+ case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
+ case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
+ case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
}
}
@@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
+SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::i1)
+ return LowerLOADi1(Op, DAG);
+ else
+ return SDValue();
+}
+
// v = ld i1* addr
// =>
// v1 = ld i8* addr
// v = trunc v1 to i1
SDValue NVPTXTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
DebugLoc dl = Node->getDebugLoc();
@@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValVT = Op.getOperand(1).getValueType();
+ if (ValVT == MVT::i1)
+ return LowerSTOREi1(Op, DAG);
+ else if (ValVT.isVector())
+ return LowerSTOREVector(Op, DAG);
+ else
+ return SDValue();
+}
+
+SDValue
+NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDValue Val = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT ValVT = Val.getValueType();
+
+ if (ValVT.isVector()) {
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 stores of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ if (!ValVT.isSimple())
+ return SDValue();
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ unsigned Opcode = 0;
+ EVT EltVT = ValVT.getVectorElementType();
+ unsigned NumElts = ValVT.getVectorNumElements();
+
+ // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // stored type to i16 and propogate the "real" type as the memory type.
+ bool NeedExt = false;
+ if (EltVT.getSizeInBits() < 16)
+ NeedExt = true;
+
+ switch (NumElts) {
+ default: return SDValue();
+ case 2:
+ Opcode = NVPTXISD::StoreV2;
+ break;
+ case 4: {
+ Opcode = NVPTXISD::StoreV4;
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> Ops;
+
+ // First is the chain
+ Ops.push_back(N->getOperand(0));
+
+ // Then the split values
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+ DAG.getIntPtrConstant(i));
+ if (NeedExt)
+ // ANY_EXTEND is correct here since the store will only look at the
+ // lower-order bits anyway.
+ ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ Ops.push_back(ExtVal);
+ }
+
+ // Then any remaining arguments
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+
+ SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
+ DAG.getVTList(MVT::Other), &Ops[0],
+ Ops.size(), MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
+
+
+ //return DCI.CombineTo(N, NewSt, true);
+ return NewSt;
+ }
+
+ return SDValue();
+}
+
// st i1 v, addr
// =>
// v1 = zxt v to i8
// st i8, addr
SDValue NVPTXTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -1027,9 +1090,11 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
if (isABI || isKernel) {
// If ABI, load from the param symbol
SDValue Arg = getParamSymbol(DAG, idx);
- Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT(
- F->getContext()),
- llvm::ADDRESS_SPACE_PARAM));
+ // Conjure up a value that we can get the address space from.
+ // FIXME: Using a constant here is a hack.
+ Value *srcValue = Constant::getNullValue(PointerType::get(
+ ObjectVT.getTypeForEVT(F->getContext()),
+ llvm::ADDRESS_SPACE_PARAM));
SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
MachinePointerInfo(srcValue), false, false,
false,
@@ -1346,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
}
+
+/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
+static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue>& Results) {
+ EVT ResVT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ assert(ResVT.isVector() && "Vector load must have vector type");
+
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 loads of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ assert(ResVT.isSimple() && "Can only handle simple types");
+ switch (ResVT.getSimpleVT().SimpleTy) {
+ default: return;
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ EVT EltVT = ResVT.getVectorElementType();
+ unsigned NumElts = ResVT.getVectorNumElements();
+
+ // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default: return;
+ case 2:
+ Opcode = NVPTXISD::LoadV2;
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ Opcode = NVPTXISD::LoadV4;
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ // The select routine does not have access to the LoadSDNode instance, so
+ // pass along the extension information
+ OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+ OtherOps.size(), LD->getMemoryVT(),
+ LD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+}
+
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Intrin = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Get the intrinsic ID
+ unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
+ switch(IntrinNo) {
+ default: return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p: {
+ EVT ResVT = N->getValueType(0);
+
+ if (ResVT.isVector()) {
+ // Vector LDG/LDU
+
+ unsigned NumElts = ResVT.getVectorNumElements();
+ EVT EltVT = ResVT.getVectorElementType();
+
+ // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default: return;
+ case 2:
+ switch(IntrinNo) {
+ default: return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV2;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV2;
+ break;
+ }
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ switch(IntrinNo) {
+ default: return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV4;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV4;
+ break;
+ }
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+
+ OtherOps.push_back(Chain); // Chain
+ // Skip operand 1 (intrinsic ID)
+ // Others
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+ OtherOps.size(), MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+ } else {
+ // i8 LDG/LDU
+ assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
+ "Custom handling of non-i8 ldu/ldg?");
+
+ // Just copy all operands as-is
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
+ // Force output to i16
+ SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ // We make sure the memory type is i8, which will be used during isel
+ // to select the proper instruction.
+ SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
+ LdResVTs, &Ops[0],
+ Ops.size(), MVT::i8,
+ MemSD->getMemOperand());
+
+ Results.push_back(NewLD.getValue(0));
+ Results.push_back(NewLD.getValue(1));
+ }
+ }
+ }
+}
+
+void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default: report_fatal_error("Unhandled custom legalization");
+ case ISD::LOAD:
+ ReplaceLoadVector(N, DAG, Results);
+ return;
+ case ISD::INTRINSIC_W_CHAIN:
+ ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
+ return;
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 0a1833a..95e7b55 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -58,7 +58,16 @@ enum NodeType {
RETURN,
CallSeqBegin,
CallSeqEnd,
- Dummy
+ Dummy,
+
+ LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LoadV4,
+ LDGV2, // LDG.v2
+ LDGV4, // LDG.v4
+ LDUV2, // LDU.v2
+ LDUV4, // LDU.v4
+ StoreV2,
+ StoreV4
};
}
@@ -143,8 +152,16 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 6fe654cb..9e73d80 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
NVPTX::Float64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
- NVPTX::V4F32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
- NVPTX::V4I32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
- NVPTX::V2F32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
- NVPTX::V2I32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
- NVPTX::V4I8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
- NVPTX::V2I8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
- NVPTX::V4I16RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
- NVPTX::V2I16RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
- NVPTX::V2I64RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
- NVPTX::V2F64RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
else {
llvm_unreachable("Don't know how to copy a register");
}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8a410b8..f43abe2 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
def hasVote : Predicate<"Subtarget.hasVote()">;
def hasDouble : Predicate<"Subtarget.hasDouble()">;
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDG : Predicate<"Subtarget.hasLDG()">;
def hasLDU : Predicate<"Subtarget.hasLDU()">;
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
@@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr];"), []>;
+ def _areg_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr];"), []>;
def _ari : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr+$offset];"), []>;
+ def _ari_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr+$offset];"), []>;
def _asi : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
-let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
-defm LD_v2i8 : LD<V2I8Regs>;
-defm LD_v4i8 : LD<V4I8Regs>;
-defm LD_v2i16 : LD<V2I16Regs>;
-defm LD_v4i16 : LD<V4I16Regs>;
-defm LD_v2i32 : LD<V2I32Regs>;
-defm LD_v4i32 : LD<V4I32Regs>;
-defm LD_v2f32 : LD<V2F32Regs>;
-defm LD_v4f32 : LD<V4F32Regs>;
-defm LD_v2i64 : LD<V2I64Regs>;
-defm LD_v2f64 : LD<V2F64Regs>;
-}
-
multiclass ST<NVPTXRegClass regclass> {
def _avar : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
@@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr], $src;"), []>;
+ def _areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr], $src;"), []>;
def _ari : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr+$offset], $src;"), []>;
+ def _ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr+$offset], $src;"), []>;
def _asi : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
@@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
-let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
-defm ST_v2i8 : ST<V2I8Regs>;
-defm ST_v4i8 : ST<V4I8Regs>;
-defm ST_v2i16 : ST<V2I16Regs>;
-defm ST_v4i16 : ST<V4I16Regs>;
-defm ST_v2i32 : ST<V2I32Regs>;
-defm ST_v4i32 : ST<V4I32Regs>;
-defm ST_v2f32 : ST<V2F32Regs>;
-defm ST_v4f32 : ST<V4F32Regs>;
-defm ST_v2i64 : ST<V2I64Regs>;
-defm ST_v2f64 : ST<V2F64Regs>;
-}
-
// The following is used only in and after vector elementizations.
// Vector elementization happens at the machine instruction level, so the
// following instruction
@@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
+ def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
def _v2_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
def _v2_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
@@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
def _v4_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
+ def _v4_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
def _v4_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
"trap;",
[(trap)]>;
-include "NVPTXVector.td"
-
include "NVPTXIntrinsics.td"
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 028a94b..49e2568 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
-// Vector ldu
-multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
- NVPTXInst eleInst, NVPTXInst eleInst64> {
- def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
- Requires<[hasLDU]>;
- def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
- Requires<[hasLDU]>;
+
+//-----------------------------------
+// Support for ldg on sm_35 or later
+//-----------------------------------
+
+def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ return M->getMemoryVT() == MVT::i8;
+}]>;
+
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+defm INT_PTX_LDG_GLOBAL_i8
+ : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
+defm INT_PTX_LDG_GLOBAL_i16
+ : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_f32
+ : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_f64
+ : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_p32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
+defm INT_PTX_LDG_GLOBAL_p64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
+
+// vector
+
+// Elementized vector ldg
+multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
-let VecInstType=isVecLD.Value in {
-defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
- V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
- INT_PTX_LDU_G_v2i8_ELE_64>;
-defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
- V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
- INT_PTX_LDU_G_v4i8_ELE_64>;
-defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
- V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
- INT_PTX_LDU_G_v2i16_ELE_64>;
-defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
- V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
- INT_PTX_LDU_G_v4i16_ELE_64>;
-defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
- V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
- INT_PTX_LDU_G_v2i32_ELE_64>;
-defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
- V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
- INT_PTX_LDU_G_v4i32_ELE_64>;
-defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
- V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
- INT_PTX_LDU_G_v2f32_ELE_64>;
-defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
- V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
- INT_PTX_LDU_G_v4f32_ELE_64>;
-defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
- V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
- INT_PTX_LDU_G_v2i64_ELE_64>;
-defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
- V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
- INT_PTX_LDU_G_v2f64_ELE_64>;
+multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
+// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
+defm INT_PTX_LDG_G_v2i8_ELE
+ : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i16_ELE
+ : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i32_ELE
+ : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f32_ELE
+ : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDG_G_v2i64_ELE
+ : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDG_G_v2f64_ELE
+ : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDG_G_v4i8_ELE
+ : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i16_ELE
+ : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i32_ELE
+ : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f32_ELE
+ : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 08be917..350a2c5 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
- else if (RC == &NVPTX::V2F32RegsRegClass) {
- return ".v2.f32";
- }
- else if (RC == &NVPTX::V4F32RegsRegClass) {
- return ".v4.f32";
- }
- else if (RC == &NVPTX::V2I32RegsRegClass) {
- return ".v2.s32";
- }
- else if (RC == &NVPTX::V4I32RegsRegClass) {
- return ".v4.s32";
- }
- else if (RC == &NVPTX::V2F64RegsRegClass) {
- return ".v2.f64";
- }
- else if (RC == &NVPTX::V2I64RegsRegClass) {
- return ".v2.s64";
- }
- else if (RC == &NVPTX::V2I16RegsRegClass) {
- return ".v2.s16";
- }
- else if (RC == &NVPTX::V4I16RegsRegClass) {
- return ".v4.s16";
- }
- else if (RC == &NVPTX::V2I8RegsRegClass) {
- return ".v2.s16";
- }
- else if (RC == &NVPTX::V4I8RegsRegClass) {
- return ".v4.s16";
- }
else {
return "INTERNAL";
}
@@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
- else if (RC == &NVPTX::V2F32RegsRegClass) {
- return "%v2f";
- }
- else if (RC == &NVPTX::V4F32RegsRegClass) {
- return "%v4f";
- }
- else if (RC == &NVPTX::V2I32RegsRegClass) {
- return "%v2r";
- }
- else if (RC == &NVPTX::V4I32RegsRegClass) {
- return "%v4r";
- }
- else if (RC == &NVPTX::V2F64RegsRegClass) {
- return "%v2fd";
- }
- else if (RC == &NVPTX::V2I64RegsRegClass) {
- return "%v2rd";
- }
- else if (RC == &NVPTX::V2I16RegsRegClass) {
- return "%v2s";
- }
- else if (RC == &NVPTX::V4I16RegsRegClass) {
- return "%v4rs";
- }
- else if (RC == &NVPTX::V2I8RegsRegClass) {
- return "%v2rc";
- }
- else if (RC == &NVPTX::V4I8RegsRegClass) {
- return "%v4rc";
- }
else {
return "INTERNAL";
}
return "";
}
-
-bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return true;
- return false;
-}
-
-std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
- llvm_unreachable("Not a vector register class");
-}
-
-const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return (&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return (&NVPTX::Float64RegsRegClass);
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return (&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return (&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return (&NVPTX::Int64RegsRegClass);
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return (&NVPTX::Int8RegsRegClass);
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return (&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return (&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return (&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return (&NVPTX::Int8RegsRegClass);
- llvm_unreachable("Not a vector register class");
-}
-
-int getNVPTXVectorSize(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return 4;
- llvm_unreachable("Not a vector register class");
-}
}
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
@@ -277,30 +121,22 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
void NVPTXRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj,
+ int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(i+1).getImm();
+ MI.getOperand(FIOperandNum+1).getImm();
// Using I0 as the frame pointer
- MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
+ MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
}
-
int NVPTXRegisterInfo::
getDwarfRegNum(unsigned RegNum, bool isEH) const {
return 0;
@@ -314,12 +150,3 @@ unsigned NVPTXRegisterInfo::getRARegister() const {
return 0;
}
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void NVPTXRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- // Simply discard ADJCALLSTACKDOWN,
- // ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 5951783..69f73f2 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -55,13 +55,9 @@ public:
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj,
+ int SPAdj, unsigned FIOperandNum,
RegScavenger *RS=NULL) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
virtual unsigned getFrameRegister(const MachineFunction &MF) const;
virtual unsigned getRARegister() const;
@@ -81,10 +77,6 @@ public:
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
-bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
-std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
-int getNVPTXVectorSize (const TargetRegisterClass *RC);
-const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index ba15825..8d100d6 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -37,9 +37,6 @@ foreach i = 0-395 in {
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
- // Vectors
- foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
- def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
// Arguments
def ia#i : NVPTXReg<"%ia"#i>;
@@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
- NVPTXRegClass sClass,
- int e,
- string n>
- : NVPTXRegClass<regTypes, alignment, regList>
-{
- NVPTXRegClass scalarClass=sClass;
- int elems=e;
- string name=n;
-}
-def V2F32Regs
- : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
- Float32Regs, 2, ".v2.f32">;
-def V4F32Regs
- : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
- Float32Regs, 4, ".v4.f32">;
-def V2I32Regs
- : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
- Int32Regs, 2, ".v2.u32">;
-def V4I32Regs
- : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
- Int32Regs, 4, ".v4.u32">;
-def V2F64Regs
- : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
- Float64Regs, 2, ".v2.f64">;
-def V2I64Regs
- : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
- Int64Regs, 2, ".v2.u64">;
-def V2I16Regs
- : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
- Int16Regs, 2, ".v2.u16">;
-def V4I16Regs
- : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
- Int16Regs, 4, ".v4.u16">;
-def V2I8Regs
- : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
- Int8Regs, 2, ".v2.u8">;
-def V4I8Regs
- : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
- Int8Regs, 4, ".v4.u8">;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index e6cb7c2..beea77e 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -57,6 +57,7 @@ public:
bool hasF32FTZ() const { return SmVersion >= 20; }
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
+ bool hasLDG() const { return SmVersion >= 32; }
bool hasLDU() const { return SmVersion >= 20; }
bool hasGenericLdSt() const { return SmVersion >= 20; }
inline bool hasHWROT32() const { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index b4e049e..cd765fa 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() {
addPass(createSplitBBatBarPass());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
- addPass(createVectorElementizePass(getNVPTXTargetMachine()));
return false;
}
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
deleted file mode 100644
index f1b285d..0000000
--- a/lib/Target/NVPTX/VectorElementize.cpp
+++ /dev/null
@@ -1,1239 +0,0 @@
-//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts operations on vector types to operations on their
-// element types.
-//
-// For generic binary and unary vector instructions, the conversion is simple.
-// Suppose we have
-// av = bv Vop cv
-// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
-// This gets converted to the following :
-// a1 = b1 Sop c1
-// a2 = b2 Sop c2
-//
-// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
-// For the above example, the map will look as follows:
-// av => [a1, a2]
-// bv => [b1, b2]
-//
-// In addition, initVectorInfo creates the following opcode->opcode map.
-// Vop => Sop
-// OtherVop => OtherSop
-// ...
-//
-// For vector specific instructions like vecbuild, vecshuffle etc, the
-// conversion is different. Look at comments near the functions with
-// prefix createVec<...>.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
- virtual bool runOnMachineFunction(MachineFunction &F);
-
- NVPTXTargetMachine &TM;
- MachineRegisterInfo *MRI;
- const NVPTXRegisterInfo *RegInfo;
- const NVPTXInstrInfo *InstrInfo;
-
- llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
- RegClassMap;
- llvm::DenseMap<unsigned, bool> SimpleMoveMap;
-
- llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
-
- bool isVectorInstr(MachineInstr *);
-
- SmallVector<unsigned, 4> getScalarRegisters(unsigned);
- unsigned getScalarVersion(unsigned);
- unsigned getScalarVersion(MachineInstr *);
-
- bool isVectorRegister(unsigned);
- const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
- unsigned numCopiesNeeded(MachineInstr *);
-
- void createLoadCopy(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
- void createStoreCopy(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- void createVecDest(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- void createCopies(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- unsigned copyProp(MachineFunction&);
- unsigned removeDeadMoves(MachineFunction&);
-
- void elementize(MachineFunction&);
-
- bool isSimpleMove(MachineInstr *);
-
- void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecExtract(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecInsert(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecBuild(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
-public:
-
- static char ID; // Pass identification, replacement for typeid
- VectorElementize(NVPTXTargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) {}
-
- virtual const char *getPassName() const {
- return "Convert LLVM vector types to their element types";
- }
-};
-
-char VectorElementize::ID = 1;
-}
-
-static cl::opt<bool>
-RemoveRedundantMoves("nvptx-remove-redundant-moves",
- cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
- cl::init(true));
-
-#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
- >> NVPTX::VecInstTypeShift)
-#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
-#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad)
-#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore)
-#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild)
-#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
-#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
-#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert)
-#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest)
-
-bool VectorElementize::isSimpleMove(MachineInstr *mi) {
- if (mi->isCopy())
- return true;
- unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
- >> NVPTX::SimpleMoveShift;
- return (TSFlags == 1);
-}
-
-bool VectorElementize::isVectorInstr(MachineInstr *mi) {
- if ((mi->getOpcode() == NVPTX::PHI) ||
- (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
- MachineOperand dest = mi->getOperand(0);
- return isVectorRegister(dest.getReg());
- }
- return ISVECINST(mi);
-}
-
-unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
- return getScalarVersion(mi->getOpcode());
-}
-
-///=============================================================================
-///Instr is assumed to be a vector instruction. For most vector instructions,
-///the size of the destination vector register gives the number of scalar copies
-///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
-///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
-///number of scalar copies needed.
-///=============================================================================
-unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
- unsigned numDefs=0;
- unsigned def;
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
-
- if (!oper.isReg()) continue;
- if (!oper.isDef()) continue;
- def = i;
- numDefs++;
- }
- assert((numDefs <= 1) && "Only 0 or 1 defs supported");
-
- if (numDefs == 1) {
- unsigned regnum = Instr->getOperand(def).getReg();
- if (ISVECEXTRACT(Instr))
- regnum = Instr->getOperand(1).getReg();
- return getNVPTXVectorSize(MRI->getRegClass(regnum));
- }
- else if (numDefs == 0) {
- assert(ISVECSTORE(Instr)
- && "Only 0 def instruction supported is vector store");
-
- unsigned regnum = Instr->getOperand(0).getReg();
- return getNVPTXVectorSize(MRI->getRegClass(regnum));
- }
- return 1;
-}
-
-const TargetRegisterClass *VectorElementize::
-getScalarRegClass(const TargetRegisterClass *RC) {
- assert(isNVPTXVectorRegClass(RC) &&
- "Not a vector register class");
- return getNVPTXElemClass(RC);
-}
-
-bool VectorElementize::isVectorRegister(unsigned reg) {
- const TargetRegisterClass *RC=MRI->getRegClass(reg);
- return isNVPTXVectorRegClass(RC);
-}
-
-///=============================================================================
-///For every vector register 'v' that is not already in the VectorToScalarMap,
-///create n scalar registers of the corresponding element type, where n
-///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
-///=============================================================================
-SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
- assert(isVectorRegister(regnum) && "Expecting a vector register here");
- // Create the scalar registers and put them in the map, if not already there.
- if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
- const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
- const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
-
- SmallVector<unsigned, 4> temp;
-
- for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
- temp.push_back(MRI->createVirtualRegister(scalarClass));
-
- VectorToScalarMap[regnum] = temp;
- }
- return VectorToScalarMap[regnum];
-}
-
-///=============================================================================
-///For a vector load of the form
-///va <= ldv2 [addr]
-///the following multi output instruction is created :
-///[v1, v2] <= LD [addr]
-///Look at NVPTXVector.td for the definitions of multi output loads.
-///=============================================================================
-void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstrBuilder copy(F, copies[0]);
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- // Remove the dest, that should be a vector operand.
- MachineOperand dest = copy->getOperand(0);
- unsigned regnum = dest.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy.addReg(scalarRegs[i], RegState::Define);
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy.addOperand(otherOperands[i]);
-
-}
-
-///=============================================================================
-///For a vector store of the form
-///stv2 va, [addr]
-///the following multi input instruction is created :
-///ST v1, v2, [addr]
-///Look at NVPTXVector.td for the definitions of multi input stores.
-///=============================================================================
-void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstrBuilder copy(F, copies[0]);
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- MachineOperand src = copy->getOperand(0);
- unsigned regnum = src.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy.addReg(scalarRegs[i]);
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy.addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///va <= shufflev2 vb, vc, <i1>, <i2>
-///gets converted to 2 moves into a1 and a2. The source of the moves depend on
-///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
-///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
-///instructions will be
-///a1 <= c2
-///a2 <= b1
-///=============================================================================
-void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
- unsigned src1regnum = Instr->getOperand(1).getReg();
- unsigned src2regnum = Instr->getOperand(2).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
- SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
- SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstrBuilder copy =
- BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
- MachineOperand which=Instr->getOperand(3+i);
- assert(which.isImm() && "Shuffle operand not a constant");
-
- int src=which.getImm();
- int elem=src%numcopies;
-
- if (which.getImm() < numcopies)
- copy.addReg(src1[elem]);
- else
- copy.addReg(src2[elem]);
- copies.push_back(copy);
- }
-}
-
-///=============================================================================
-///a <= extractv2 va, <i1>
-///gets turned into a simple move to the scalar register a. The source depends
-///on i1.
-///=============================================================================
-void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned srcregnum = Instr->getOperand(1).getReg();
-
- SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
- MachineOperand which = Instr->getOperand(2);
- assert(which.isImm() && "Extract operand not a constant");
-
- DebugLoc DL = Instr->getDebugLoc();
- copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
- Instr->getOperand(0).getReg())
- .addReg(src[which.getImm()]));
-}
-
-///=============================================================================
-///va <= vecinsertv2 vb, c, <i1>
-///This instruction copies all elements of vb to va, except the 'i1'th element.
-///The scalar value c becomes the 'i1'th element of va.
-///This gets translated to 2 (4 for vecinsertv4) moves.
-///=============================================================================
-void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
- unsigned srcregnum = Instr->getOperand(1).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
- SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
- MachineOperand which=Instr->getOperand(3);
- assert(which.isImm() && "Insert operand not a constant");
- unsigned int elem=which.getImm();
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstrBuilder copy =
- BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
- if (i != elem)
- copy.addReg(src[i]);
- else
- copy.addOperand(Instr->getOperand(2));
-
- copies.push_back(copy);
- }
-
-}
-
-///=============================================================================
-///va <= buildv2 b1, b2
-///gets translated to
-///a1 <= b1
-///a2 <= b2
-///=============================================================================
-void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++)
- copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
- dest[i])
- .addOperand(Instr->getOperand(1+i)));
-}
-
-///=============================================================================
-///For a tex inst of the form
-///va <= op [scalar operands]
-///the following multi output instruction is created :
-///[v1, v2] <= op' [scalar operands]
-///=============================================================================
-void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstrBuilder copy(F, copies[0]);
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- // Remove the dest, that should be a vector operand.
- MachineOperand dest = copy->getOperand(0);
- unsigned regnum = dest.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy.addReg(scalarRegs[i], RegState::Define);
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy.addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///Look at the vector instruction type and dispatch to the createVec<...>
-///function that creates the scalar copies.
-///=============================================================================
-void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- if (ISVECLOAD(Instr)) {
- createLoadCopy(F, Instr, copies);
- return;
- }
- if (ISVECSTORE(Instr)) {
- createStoreCopy(F, Instr, copies);
- return;
- }
- if (ISVECSHUFFLE(Instr)) {
- createVecShuffle(F, Instr, copies);
- return;
- }
- if (ISVECEXTRACT(Instr)) {
- createVecExtract(F, Instr, copies);
- return;
- }
- if (ISVECINSERT(Instr)) {
- createVecInsert(F, Instr, copies);
- return;
- }
- if (ISVECDEST(Instr)) {
- createVecDest(F, Instr, copies);
- return;
- }
- if (ISVECBUILD(Instr)) {
- createVecBuild(F, Instr, copies);
- return;
- }
-
- unsigned numcopies=numCopiesNeeded(Instr);
-
- for (unsigned i=0; i<numcopies; ++i)
- copies.push_back(F.CloneMachineInstr(Instr));
-
- for (unsigned i=0; i<numcopies; ++i) {
- MachineInstrBuilder copy(F, copies[i]);
-
- std::vector<MachineOperand> allOperands;
- std::vector<bool> isDef;
-
- for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
- MachineOperand oper = copy->getOperand(j);
- allOperands.push_back(oper);
- if (oper.isReg())
- isDef.push_back(oper.isDef());
- else
- isDef.push_back(false);
- }
-
- for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
- copy->RemoveOperand(0);
-
- copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
-
- for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
- MachineOperand oper=allOperands[j];
- if (oper.isReg()) {
- unsigned regnum = oper.getReg();
- if (isVectorRegister(regnum)) {
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy.addReg(scalarRegs[i], getDefRegState(isDef[j]));
- }
- else
- copy.addOperand(oper);
- }
- else
- copy.addOperand(oper);
- }
- }
-}
-
-///=============================================================================
-///Scan through all basic blocks, looking for vector instructions.
-///For each vector instruction I, insert the scalar copies before I, and
-///add I into toRemove vector. Finally remove all instructions in toRemove.
-///=============================================================================
-void VectorElementize::elementize(MachineFunction &F) {
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
- BI!=BE; ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- std::vector<MachineInstr *> copies;
- std::vector<MachineInstr *> toRemove;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
- II!=IE; ++II) {
- MachineInstr *Instr = &*II;
-
- if (!isVectorInstr(Instr))
- continue;
-
- copies.clear();
- createCopies(F, Instr, copies);
- for (unsigned i=0, e=copies.size(); i!=e; ++i)
- BB->insert(II, copies[i]);
-
- assert((copies.size() > 0) && "Problem in createCopies");
- toRemove.push_back(Instr);
- }
- for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
- F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
- }
-}
-
-///=============================================================================
-///a <= b
-///...
-///...
-///x <= op(a, ...)
-///gets converted to
-///
-///x <= op(b, ...)
-///The original move is still present. This works on SSA form machine code.
-///Note that a <= b should be a simple vreg-to-vreg move instruction.
-///TBD : I didn't find a function that can do replaceOperand, so I remove
-///all operands and add all of them again, replacing the one while adding.
-///=============================================================================
-unsigned VectorElementize::copyProp(MachineFunction &F) {
- unsigned numReplacements = 0;
-
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
- ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
- ++II) {
- MachineInstr *Instr = &*II;
-
- // Don't do copy propagation on PHI as it will cause unnecessary
- // live range overlap.
- if ((Instr->getOpcode() == TargetOpcode::PHI) ||
- (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
- continue;
-
- bool needsReplacement = false;
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
- if (!oper.isReg()) continue;
- if (oper.isDef()) continue;
- if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
-
- MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
-
- if (!defInstr) continue;
-
- if (!isSimpleMove(defInstr)) continue;
-
- MachineOperand defSrc = defInstr->getOperand(1);
- if (!defSrc.isReg()) continue;
- if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
-
- needsReplacement = true;
-
- }
- if (!needsReplacement) continue;
-
- numReplacements++;
-
- std::vector<MachineOperand> operands;
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
- bool flag = false;
- do {
- if (!(oper.isReg()))
- break;
- if (oper.isDef())
- break;
- if (!(RegInfo->isVirtualRegister(oper.getReg())))
- break;
- MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
- if (!(isSimpleMove(defInstr)))
- break;
- MachineOperand defSrc = defInstr->getOperand(1);
- if (!(defSrc.isReg()))
- break;
- if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
- break;
- operands.push_back(defSrc);
- flag = true;
- } while (0);
- if (flag == false)
- operands.push_back(oper);
- }
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
- Instr->RemoveOperand(0);
- for (unsigned i=0, e=operands.size(); i!=e; ++i)
- Instr->addOperand(F, operands[i]);
-
- }
- }
- return numReplacements;
-}
-
-///=============================================================================
-///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
-///them to deadMoves vector. Then remove all instructions in deadMoves.
-///=============================================================================
-unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
- std::vector<MachineInstr *> deadMoves;
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
- ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
- ++II) {
- MachineInstr *Instr = &*II;
-
- if (!isSimpleMove(Instr)) continue;
-
- MachineOperand dest = Instr->getOperand(0);
- assert(dest.isReg() && "dest of move not a register");
- assert(RegInfo->isVirtualRegister(dest.getReg()) &&
- "dest of move not a virtual register");
-
- if (MRI->use_empty(dest.getReg())) {
- deadMoves.push_back(Instr);
- }
- }
- }
-
- for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
- F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
-
- return deadMoves.size();
-}
-
-///=============================================================================
-///Main function for this pass.
-///=============================================================================
-bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
- MRI = &F.getRegInfo();
-
- RegInfo = TM.getRegisterInfo();
- InstrInfo = TM.getInstrInfo();
-
- VectorToScalarMap.clear();
-
- elementize(F);
-
- if (RemoveRedundantMoves)
- while (1) {
- if (copyProp(F) == 0) break;
- removeDeadMoves(F);
- }
-
- return true;
-}
-
-FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
- return new VectorElementize(tm);
-}
-
-unsigned VectorElementize::getScalarVersion(unsigned opcode) {
- if (opcode == NVPTX::PHI)
- return opcode;
- if (opcode == NVPTX::IMPLICIT_DEF)
- return opcode;
- switch(opcode) {
- default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
- case TargetOpcode::COPY: return TargetOpcode::COPY;
- case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
- case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
- case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
- case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
- case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
- case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
- case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
- case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
- case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
- case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
- case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
- case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
- case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
- case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
- case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
- case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
- case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
- case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
- case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
- case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
- case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
- case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
- case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
- case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
- case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
- case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
- case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
- case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
- case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
- case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
- case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
- case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
- case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
- case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
- case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
- case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
- case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
- case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
- case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
- case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
- case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
- case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
- case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
- case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
- case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
- case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
- case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
- case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
- case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
- case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
- case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
- case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
- case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
- case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
- case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
- case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
- case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
- case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
- case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
- case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
- case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
- case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
- case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
- case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
- case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
- case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
- case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
- case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
- case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
- case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
- case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
- case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
- case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
- case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
- case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
- case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
- case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
- case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
- case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
- case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
- case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
- case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
- case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
- case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
- case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
- case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
- case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
- case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
- case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
- case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
- case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
- case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
- case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
- case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
- case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
- case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
- case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
- case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
- case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
- case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
- case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
- case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
- case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
- case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
- case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
- case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
- case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
- case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
- case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
- case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
- case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
- case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
- case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
- case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
- case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
- case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
- case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
- case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
- case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
- case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
- case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
- case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
- case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
- case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
- case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
- case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
- case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
- case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
- case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
- case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
- case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
- case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
- case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
- case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
- case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
- case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
- case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
- case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
- case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
- case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
- case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
- case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
- case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
- case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
- case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
- case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
- case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
- case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
- case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
- case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
- case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
- case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
- case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
- case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
- case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
- case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
- case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
- case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
- case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
- case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
- case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
- case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
- case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
- case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
- case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
- case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
- case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
- case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
- case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
- case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
- case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
- case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
- case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
- case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
- case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
- case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
- case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
- case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
- case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
- case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
- case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
- case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
- case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
- case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
- case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
- case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
- case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
- case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
- case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
- case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
- case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
- case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
- case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
- case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
- case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
- case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
- case NVPTX::VNegV2I16: return NVPTX::INEG16;
- case NVPTX::VNegV2I32: return NVPTX::INEG32;
- case NVPTX::VNegV2I64: return NVPTX::INEG64;
- case NVPTX::VNegV2I8: return NVPTX::INEG8;
- case NVPTX::VNegV4I16: return NVPTX::INEG16;
- case NVPTX::VNegV4I32: return NVPTX::INEG32;
- case NVPTX::VNegV4I8: return NVPTX::INEG8;
- case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
- case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
- case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
- case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
- case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
- case NVPTX::VNotV2I16: return NVPTX::NOT16;
- case NVPTX::VNotV2I32: return NVPTX::NOT32;
- case NVPTX::VNotV2I64: return NVPTX::NOT64;
- case NVPTX::VNotV2I8: return NVPTX::NOT8;
- case NVPTX::VNotV4I16: return NVPTX::NOT16;
- case NVPTX::VNotV4I32: return NVPTX::NOT32;
- case NVPTX::VNotV4I8: return NVPTX::NOT8;
- case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
- case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
- case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
- case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
- case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
- case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
- case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
- case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
- case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
- case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
- case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
- case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
- case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
- case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
- case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
- case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
- case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
- case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
- case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
- case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
- case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
- case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
- case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
- case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
- case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
- case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
- case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
- case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
- case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
- case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
- case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
- case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
- case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
- case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
- case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
- case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
- case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
- case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
- case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
- case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
- case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
- case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
- case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
- case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
- case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
- case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
- case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
- case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
- case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
- case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
- case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
- case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
- case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
- case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
- case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
- case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
- case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
- case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
- case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
- case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
- case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
- case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
- case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
- case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
- case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
- case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
- case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
- case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
- case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
- case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
- case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
- case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
- case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
- case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
- case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
- case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
- case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
- case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
- case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
- case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
- case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
- case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
- case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
- case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
- case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
- case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
- case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
- case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
- case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
- case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
- case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
- case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
- case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
- case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
- case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
- case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
- case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
- case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
- case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
- case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
- case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
- case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
- case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
- case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
- case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
- case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
- case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
- case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
- case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
- case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
- case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
- case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
- case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
- case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
- case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
- case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
- case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
- case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
- case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
- case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
- case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
- case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
- case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
- case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
- case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
- case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
- case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
- case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
- case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
- case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
- case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
- case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
- case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
- case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
- case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
- case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
- case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
- case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
- case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
- case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
- case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
- case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
- case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
- case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
- case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
- case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
- case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
- case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
- case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
-
- case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
- case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
- case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
- case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
- case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
- case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
- case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
- case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
- case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
- case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
- case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
- case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
- case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
- case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
- case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
- case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
- case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
- case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
- case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
- case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
- case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
- case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
- case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
- case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
- case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
- case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
- case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
- case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
- case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
- case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
- case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
- case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
- case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
- case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
- case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
- case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
-
- case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
- case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
- case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari;
- case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi;
- case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
- case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
- case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari;
- case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi;
-
- case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
- case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
- case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari;
- case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi;
- case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
- case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
- case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari;
- case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi;
-
- case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
- case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
- case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari;
- case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi;
- case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
- case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
- case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari;
- case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi;
-
- case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
- case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
- case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari;
- case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi;
- case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
- case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
- case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari;
- case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi;
-
- case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
- case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
- case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari;
- case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi;
- case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
- case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
- case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari;
- case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi;
-
- case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
- case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
- case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari;
- case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi;
- case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
- case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
- case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari;
- case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi;
-
- case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
- case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
- case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari;
- case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi;
- case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
- case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
- case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari;
- case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi;
-
- case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
- case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
- case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari;
- case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi;
- case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
- case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
- case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari;
- case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi;
-
- case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
- case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
- case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari;
- case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi;
- case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
- case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
- case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari;
- case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi;
-
- case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
- case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
- case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari;
- case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi;
- case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
- case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
- case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari;
- case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi;
- }
- return 0;
-}
diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py
deleted file mode 100644
index ed06668..0000000
--- a/lib/Target/NVPTX/gen-register-defs.py
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-
-num_regs = 396
-
-outFile = open('NVPTXRegisterInfo.td', 'w')
-
-outFile.write('''
-//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class NVPTXReg<string n> : Register<n> {
- let Namespace = "NVPTX";
-}
-
-class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
- : RegisterClass <"NVPTX", regTypes, alignment, regList>;
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-
-// Special Registers used as stack pointer
-def VRFrame : NVPTXReg<"%SP">;
-def VRFrameLocal : NVPTXReg<"%SPL">;
-
-// Special Registers used as the stack
-def VRDepot : NVPTXReg<"%Depot">;
-''')
-
-# Predicates
-outFile.write('''
-//===--- Predicate --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
-
-# Int8
-outFile.write('''
-//===--- 8-bit ------------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
-
-# Int16
-outFile.write('''
-//===--- 16-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
-
-# Int32
-outFile.write('''
-//===--- 32-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
-
-# Int64
-outFile.write('''
-//===--- 64-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
-
-# F32
-outFile.write('''
-//===--- 32-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
-
-# F64
-outFile.write('''
-//===--- 64-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
-
-# Vector registers
-outFile.write('''
-//===--- Vector -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
-
-for i in range(0, num_regs):
- outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
-
-# Argument registers
-outFile.write('''
-//===--- Arguments --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
-
-outFile.write('''
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('''
-// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-''')
-
-outFile.write('''
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
- NVPTXRegClass sClass,
- int e,
- string n>
- : NVPTXRegClass<regTypes, alignment, regList>
-{
- NVPTXRegClass scalarClass=sClass;
- int elems=e;
- string name=n;
-}
-''')
-
-
-outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
-outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
-
-outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
-outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
-
-outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
-outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
-
-outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
-outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
-
-outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
-outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
-
-outFile.close()
-
-
-outFile = open('NVPTXNumRegisters.h', 'w')
-outFile.write('''
-//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NVPTX_NUM_REGISTERS_H
-#define NVPTX_NUM_REGISTERS_H
-
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = %d;
-
-}
-
-#endif
-''' % num_regs)
-
-outFile.close()
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 192d18d..6036428 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen
PPCRegisterInfo.cpp
PPCSubtarget.cpp
PPCTargetMachine.cpp
+ PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index d61e741..61868d4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -151,7 +151,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_TOC;
break;
case PPC::fixup_ppc_toc16:
- Type = ELF::R_PPC64_TOC16;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+ Type = ELF::R_PPC64_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
+ }
break;
case PPC::fixup_ppc_toc16_ds:
switch (Modifier) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 215aa40..a25d7fe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -17,8 +17,9 @@ using namespace llvm;
void PPCMCAsmInfoDarwin::anchor() { }
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
PCSymbol = ".";
@@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
void PPCLinuxMCAsmInfo::anchor() { }
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
- if (is64Bit)
- PointerSize = 8;
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
IsLittleEndian = false;
// ".comm align is in bytes but .align is pow-2."
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index e6d38eb..f71979f 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,6 +25,7 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
+ class ImmutablePass;
class JITCodeEmitter;
class MachineInstr;
class AsmPrinter;
@@ -37,6 +38,9 @@ namespace llvm {
JITCodeEmitter &MCE);
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
+
+ /// \brief Creates an PPC-specific Target Transformation Info pass.
+ ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
namespace PPCII {
@@ -53,26 +57,32 @@ namespace llvm {
/// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
/// the function's picbase, e.g. lo16(symbol-picbase).
- MO_PIC_FLAG = 4,
+ MO_PIC_FLAG = 2,
/// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
/// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
- MO_NLP_FLAG = 8,
+ MO_NLP_FLAG = 4,
/// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
/// symbol with hidden visibility. This causes a different kind of
/// non-lazy-pointer to be generated.
- MO_NLP_HIDDEN_FLAG = 16,
+ MO_NLP_HIDDEN_FLAG = 8,
/// The next are not flags but distinct values.
- MO_ACCESS_MASK = 0xe0,
+ MO_ACCESS_MASK = 0xf0,
/// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 1 << 5,
- MO_HA16 = 2 << 5,
+ MO_LO16 = 1 << 4,
+ MO_HA16 = 2 << 4,
+
+ MO_TPREL16_HA = 3 << 4,
+ MO_TPREL16_LO = 4 << 4,
- MO_TPREL16_HA = 3 << 5,
- MO_TPREL16_LO = 4 << 5
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL16_LO = 5 << 4,
+ MO_TLSLD16_LO = 6 << 4,
+ MO_TOC16_LO = 7 << 4
};
} // end namespace PPCII
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index cb15dad..9929136 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E500mc", "">;
def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
"PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
@@ -58,6 +63,25 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB p6, p6x, p7 cmpb
+// DFP p6, p6x, p7 decimal floating-point instructions
+// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
+// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz
+// FRE p5 through p7 fre (vs. fres, available since p3)
+// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
+// LDBRX p7 load with byte reversal
+// LFIWAX p6, p6x, p7 lfiwax
+// LFIWZX p7 lfiwzx
+// POPCNTB p5 through p7 popcntb and related instructions
+// POPCNTD p7 popcntd and related instructions
+// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates
+// VSX p7 vector-scalar instruction set
//===----------------------------------------------------------------------===//
// Register File Description
@@ -109,10 +133,30 @@ def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
FeatureSTFIWX, FeatureISEL,
Feature64Bit
/*, Feature64BitRegs */]>;
+def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureSTFIWX, FeatureISEL,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureQPX]>;
+def : Processor<"pwr3", G5Itineraries,
+ [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : Processor<"pwr4", G5Itineraries,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+def : Processor<"pwr5", G5Itineraries,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+def : Processor<"pwr5x", G5Itineraries,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr6", G5Itineraries,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"pwr6x", G5Itineraries,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index adb673b..eae9b7b 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -464,12 +464,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// associated TOC entry. Otherwise reference the symbol directly.
TmpInst.setOpcode(PPC::LDrs);
const MachineOperand &MO = MI->getOperand(1);
- assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!");
+ assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ "Invalid operand for LDtocL!");
MCSymbol *MOSymbol = 0;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else {
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
@@ -732,14 +735,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8 /*size*/);
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
MCSymbolRefExpr::VK_PPC_TOC, OutContext),
- 8/*size*/, 0/*addrspace*/);
+ 8/*size*/);
// Emit a null environment pointer.
- OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */);
+ OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
@@ -768,6 +771,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
return AsmPrinter::doFinalization(M);
}
@@ -802,7 +824,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"ppcA2",
"ppce500mc",
"ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
"power6",
+ "power6x",
"power7",
"ppc64"
};
@@ -817,8 +844,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
// FIXME: This is a total hack, finish mc'izing the PPC backend.
- if (OutStreamer.hasRawTextSupport())
+ if (OutStreamer.hasRawTextSupport()) {
+ assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+ }
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -1031,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
else
// Internal to current translation unit.
//
@@ -1041,7 +1071,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
// fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
@@ -1060,7 +1090,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 9911575..bd1c378 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -28,10 +28,16 @@ using namespace llvm;
STATISTIC(NumExpanded, "Number of branches expanded to long format");
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -45,6 +51,9 @@ namespace {
char PPCBSel::ID = 0;
}
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
/// Pass
///
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index a74932c..b98cc48 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -54,6 +54,10 @@ using namespace llvm;
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+}
+
namespace {
class CountValue;
struct PPCCTRLoops : public MachineFunctionPass {
@@ -64,7 +68,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- PPCCTRLoops() : MachineFunctionPass(ID) {}
+ PPCCTRLoops() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -174,6 +180,12 @@ namespace {
};
} // end anonymous namespace
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 3f87e88..caeb179 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
- CCIfType<[f32], CCAssignToReg<[F1]>>,
- CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
// Vector types are always returned in V2.
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
@@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[
//===----------------------------------------------------------------------===//
-// PowerPC Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-/*
-def CC_PPC : CallingConv<[
- // The first 8 integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
- CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
-
- // Common sub-targets passes FP values in F1 - F13
- CCIfType<[f32, f64],
- CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
-
- // The first 12 Vector arguments are passed in altivec registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
-
-/*
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToStack<16, 16>>*/
-]>;
-
-*/
-
-//===----------------------------------------------------------------------===//
-// PowerPC System V Release 4 ABI
+// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
-def CC_PPC_SVR4_Common : CallingConv<[
+def CC_PPC32_SVR4_Common : CallingConv<[
// The ABI requires i64 to be passed in two adjacent registers with the first
// register having an odd register number.
- CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
// The first 8 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
// Make sure the i64 words from a long double are either both passed in
// registers or both passed on the stack.
- CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
// FP values are passed in F1 - F8.
CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
@@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[
// This calling convention puts vector arguments always on the stack. It is used
// to assign vector arguments which belong to the variable portion of the
// parameter list of a variable argument function.
-def CC_PPC_SVR4_VarArg : CallingConv<[
- CCDelegateTo<CC_PPC_SVR4_Common>
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
-// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
-// vector arguments in vector registers before putting them on the stack.
-def CC_PPC_SVR4 : CallingConv<[
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
// The first 12 Vector arguments are passed in AltiVec registers.
CCIfType<[v16i8, v8i16, v4i32, v4f32],
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
- CCDelegateTo<CC_PPC_SVR4_Common>
+ CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
// Helper "calling convention" to handle aggregate by value arguments.
@@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[
// Still, the address of the aggregate copy in the callers stack frame is passed
// in a GPR (or in the parameter list area if all GPRs are allocated) from the
// caller to the callee. The location for the address argument is assigned by
-// the CC_PPC_SVR4 calling convention.
+// the CC_PPC32_SVR4 calling convention.
//
-// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
// not passed by value.
-def CC_PPC_SVR4_ByVal : CallingConv<[
+def CC_PPC32_SVR4_ByVal : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
- CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
]>;
def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 5901f36..0a396e6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -119,12 +119,21 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
}
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = getPPCRegisterNumbering(*I);
- if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (MBB.empty() || !MBB.back().isReturn())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = getPPCRegisterNumbering(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
}
// If no registers are used, turn this into a copy.
@@ -198,13 +207,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
// to adjust the stack pointer (we fit in the Red Zone). For 64-bit
// SVR4, we also require a stack frame if we need to spill the CR,
// since this spill area is addressed relative to the stack pointer.
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
bool DisableRedZone = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
- // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can
- // still generate stackless code if all local vars are reg-allocated.
- // Try: (FrameSize <= 224
- // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI()))
if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -777,7 +787,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
FI->setMustSaveLR(MustSaveLR(MF, LR));
- MF.getRegInfo().setPhysRegUnused(LR);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setPhysRegUnused(LR);
// Save R31 if necessary
int FPSI = FI->getFramePointerSaveIndex();
@@ -802,6 +813,16 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (MRI.isPhysRegUsed(PPC::CR2) ||
+ MRI.isPhysRegUsed(PPC::CR3) ||
+ MRI.isPhysRegUsed(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
+
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
// a large stack, which will require scavenging a register to materialize a
// large offset.
@@ -1115,6 +1136,47 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
.addReg(MoveReg));
}
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
bool
PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 3517d8c..d09e47f 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -27,7 +27,8 @@ class PPCFrameLowering: public TargetFrameLowering {
public:
PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
Subtarget(sti) {
}
@@ -50,6 +51,10 @@ public:
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 762b346..17bea8a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -23,9 +23,9 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -34,6 +34,10 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
namespace {
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
@@ -48,7 +52,9 @@ namespace {
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
PPCLowering(*TM.getTargetLowering()),
- PPCSubTarget(*TM.getSubtargetImpl()) {}
+ PPCSubTarget(*TM.getSubtargetImpl()) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF) {
// Make sure we re-emit a set of the global base reg if necessary
@@ -61,6 +67,8 @@ namespace {
return true;
}
+ virtual void PostprocessISelDAG();
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -1273,16 +1281,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case PPCISD::TOC_ENTRY: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
- // For medium code model, we generate two instructions as described
- // below. Otherwise we allow SelectCodeCommon to handle this, selecting
- // one of LDtoc, LDtocJTI, and LDtocCPT.
- if (TM.getCodeModel() != CodeModel::Medium)
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
// The first source operand is a TargetGlobalAddress or a
// TargetJumpTable. If it is an externally defined symbol, a symbol
// with common linkage, a function address, or a jump table address,
- // we generate:
+ // or if we are generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -1291,7 +1300,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
TOCbase, GA);
- if (isa<JumpTableSDNode>(GA))
+ if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
@@ -1316,11 +1325,231 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
}
return SelectCode(N);
}
+/// PostProcessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI8L:
+ case PPC::ADDIL:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL16_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD16_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC16_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
/// createPPCISelDag - This pass converts a legalized DAG into a
@@ -1330,3 +1559,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
return new PPCDAGToDAGISel(TM);
}
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
+ false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9966b2c..cf1f459 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -36,20 +36,20 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -132,11 +132,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We don't support sin/cos/sqrt/fmod/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Legal);
@@ -498,15 +500,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
- maxStoresPerMemset = 32;
- maxStoresPerMemsetOptSize = 16;
- maxStoresPerMemcpy = 32;
- maxStoresPerMemcpyOptSize = 8;
- maxStoresPerMemmove = 32;
- maxStoresPerMemmoveOptSize = 8;
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
setPrefFunctionAlignment(4);
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
}
}
@@ -592,6 +594,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
}
}
@@ -1746,18 +1749,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1780,11 +1783,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
@@ -1907,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
- CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -1968,7 +1971,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
@@ -2160,13 +2163,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
@@ -2501,6 +2507,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
+ // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
+ // When passing anonymous aggregates, this is currently not true.
+ // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
@@ -3323,7 +3332,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
- // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
int BytesCalleePops =
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
@@ -3339,17 +3348,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// Emit tail call.
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
@@ -3493,11 +3491,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
bool Result;
if (Outs[i].IsFixed) {
- Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
- CCInfo);
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
} else {
- Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
- ArgFlags, CCInfo);
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
}
if (Result) {
@@ -3510,7 +3508,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
} else {
// All arguments are treated the same.
- CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
}
// Assign locations to all of the outgoing aggregate by value arguments.
@@ -3521,7 +3519,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
- CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
@@ -4415,14 +4413,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -4447,12 +4439,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -5028,11 +5025,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
- Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = Op.getValueType();
+ int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
+ SDValue EltSize = DAG.getConstant(Size, MVT::i32);
+ return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -5128,23 +5135,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
}
- // Three instruction sequences.
-
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- if (SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- if (SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
- }
-
return SDValue();
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 12b3df7..f5d418c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -237,6 +237,12 @@ namespace llvm {
/// sym@got@dtprel@l.
ADDI_DTPREL_L,
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -252,13 +258,14 @@ namespace llvm {
/// or i32.
LBRX,
- /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces
- /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha.
+ /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+ /// produces an ADDIS8 instruction that adds the TOC base register to
+ /// sym@toc@ha.
ADDIS_TOC_HA,
- /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a
- /// LD instruction with base register G8RReg and offset sym@toc@l.
- /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+ /// produces a LD instruction with base register G8RReg and offset
+ /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
LD_TOC_L,
/// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 1dd5415..0120130 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -701,7 +701,7 @@ def : Pat<(PPCload ixaddr:$src),
def : Pat<(PPCload xaddr:$src),
(LDX xaddr:$src)>;
-// Support for medium code model.
+// Support for medium and large code model.
def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
"#ADDIStocHA",
[(set G8RC:$rD,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 8c077b7..460e943 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -181,7 +181,7 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
-// Instructions to support medium code model
+// Instructions to support medium and large code model
def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
@@ -346,7 +346,7 @@ def crbitm: Operand<i8> {
// Address operands
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
@@ -355,7 +355,7 @@ def memrr : Operand<iPTR> {
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 851de17..cfcd749 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -115,7 +115,7 @@ asm(
"lwz r2, 208(r1)\n" // stub's frame
"lwz r4, 8(r2)\n" // stub's lr
"li r5, 0\n" // 0 == 32 bit
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
"mtctr r3\n"
// Restore all int arg registers
"lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
@@ -178,7 +178,7 @@ asm(
"lwz 5, 104(1)\n" // stub's frame
"lwz 4, 4(5)\n" // stub's lr
"li 5, 0\n" // 0 == 32 bit
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"mtctr 3\n"
// Restore all int arg registers
"lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
@@ -259,10 +259,10 @@ asm(
"ld 4, 16(5)\n" // stub's lr
"li 5, 1\n" // 1 == 64 bit
#ifdef __ELF__
- "bl PPCCompilationCallbackC\n"
+ "bl LLVMPPCCompilationCallback\n"
"nop\n"
#else
- "bl _PPCCompilationCallbackC\n"
+ "bl _LLVMPPCCompilationCallback\n"
#endif
"mtctr 3\n"
// Restore all int arg registers
@@ -292,9 +292,10 @@ void PPC64CompilationCallback() {
#endif
extern "C" {
-static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
- unsigned *OrigCallAddrPlus4,
- bool is64Bit) {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
// Adjust the pointer to the address of the call instruction in the stub
// emitted by emitFunctionStub, rather than the instruction after it.
unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 73f7a2c..9b0df3e 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
+ case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
+ break;
+ case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
+ break;
+ case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
+ break;
}
// FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 24caffa..045b375 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -71,6 +71,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// register for parameter passing.
unsigned VarArgsNumFPR;
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -83,7 +86,8 @@ public:
VarArgsFrameIndex(0),
VarArgsStackOffset(0),
VarArgsNumGPR(0),
- VarArgsNumFPR(0) {}
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -125,6 +129,9 @@ public:
unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 378c147..df245cc 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
- Subtarget(ST), TII(tii), CRSpillFrameIdx(0) {
+ Subtarget(ST), TII(tii) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -111,11 +111,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
CSR_Darwin32_SaveList;
- // For 32-bit SVR4, also initialize the frame index associated with
- // the CR spill slot.
- if (!Subtarget.isPPC64())
- CRSpillFrameIdx = 0;
-
return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
}
@@ -222,45 +217,6 @@ PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-void PPCRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- I->getOpcode() == PPC::ADJCALLSTACKUP) {
- // Add (actually subtract) back the amount the callee popped on return.
- if (int CalleeAmt = I->getOperand(1).getImm()) {
- bool is64Bit = Subtarget.isPPC64();
- CalleeAmt *= -1;
- unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
- unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
-
- if (isInt<16>(CalleeAmt)) {
- BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addImm(CalleeAmt);
- } else {
- MachineBasicBlock::iterator MBBI = I;
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
- .addImm(CalleeAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
- .addImm(CalleeAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
- .addReg(StackReg, RegState::Kill)
- .addReg(TmpReg);
- }
- }
- }
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
/// register first and then a spilled callee-saved register if that fails.
static
@@ -489,19 +445,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
- // is arbitrary and will be subsequently ignored. For 32-bit, we must
- // create exactly one stack slot and return its FrameIdx for all
- // nonvolatiles.
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64()) {
+ if (Subtarget.isPPC64())
FrameIdx = 0;
- } else if (CRSpillFrameIdx) {
- FrameIdx = CRSpillFrameIdx;
- } else {
- MachineFrameInfo *MFI =
- (const_cast<MachineFunction &>(MF)).getFrameInfo();
- FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
- CRSpillFrameIdx = FrameIdx;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
}
return true;
}
@@ -510,7 +461,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -524,20 +476,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- // Find out which operand is the frame index.
- unsigned FIOperandNo = 0;
- while (!MI.getOperand(FIOperandNo).isFI()) {
- ++FIOperandNo;
- assert(FIOperandNo != MI.getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
- }
// Take into account whether it's an add or mem instruction
- unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
if (MI.isInlineAsm())
- OffsetOperandNo = FIOperandNo-1;
+ OffsetOperandNo = FIOperandNum-1;
// Get the frame index.
- int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Get the frame pointer save index. Users of this index are primarily
// DYNALLOC instructions.
@@ -567,7 +512,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
bool is64Bit = Subtarget.isPPC64();
- MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
(is64Bit ? PPC::X31 : PPC::R31) :
(is64Bit ? PPC::X1 : PPC::R1),
false);
@@ -649,7 +594,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OperandBase = OffsetOperandNo;
}
- unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index a8fd796..9840666 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -30,7 +30,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
std::map<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
const TargetInstrInfo &TII;
- mutable int CRSpillFrameIdx;
public:
PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
@@ -56,10 +55,6 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void lowerDynamicAlloc(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
@@ -69,7 +64,8 @@ public:
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 5ca3876..8ee9b1e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -63,142 +63,28 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> {
field bits<5> Num = num;
}
-
// General-purpose registers
-def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>;
-def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>;
-def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>;
-def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>;
-def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>;
-def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>;
-def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>;
-def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>;
-def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>;
-def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>;
-def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>;
-def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>;
-def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>;
-def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>;
-def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>;
-def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>;
-def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>;
-def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>;
-def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>;
-def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>;
-def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>;
-def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>;
-def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>;
-def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>;
-def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>;
-def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>;
-def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>;
-def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>;
-def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>;
-def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>;
-def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>;
-def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>;
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
// 64-bit General-purpose registers
-def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>;
-def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>;
-def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>;
-def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>;
-def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>;
-def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>;
-def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>;
-def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>;
-def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>;
-def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>;
-def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>;
-def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>;
-def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>;
-def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>;
-def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>;
-def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>;
-def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>;
-def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>;
-def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>;
-def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>;
-def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>;
-def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>;
-def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>;
-def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>;
-def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>;
-def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>;
-def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>;
-def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>;
-def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>;
-def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>;
-def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>;
-def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>;
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
// Floating-point registers
-def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>;
-def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>;
-def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>;
-def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>;
-def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>;
-def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>;
-def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>;
-def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>;
-def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>;
-def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>;
-def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>;
-def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>;
-def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>;
-def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>;
-def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>;
-def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>;
-def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>;
-def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>;
-def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>;
-def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>;
-def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>;
-def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>;
-def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>;
-def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>;
-def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>;
-def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>;
-def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>;
-def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>;
-def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>;
-def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>;
-def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>;
-def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>;
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
// Vector registers
-def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>;
-def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>;
-def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>;
-def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>;
-def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>;
-def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>;
-def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>;
-def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>;
-def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>;
-def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>;
-def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>;
-def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>;
-def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>;
-def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>;
-def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>;
-def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>;
-def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>;
-def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>;
-def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>;
-def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>;
-def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>;
-def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>;
-def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>;
-def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>;
-def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>;
-def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>;
-def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>;
-def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>;
-def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>;
-def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>;
-def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>;
-def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>;
+foreach Index = 0-31 in {
+ def V#Index : VR<Index, "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index d9b4e30..18e4c07 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -36,6 +36,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, Use64BitRegs(false)
, IsPPC64(is64Bit)
, HasAltivec(false)
+ , HasQPX(false)
, HasFSQRT(false)
, HasSTFIWX(false)
, HasISEL(false)
@@ -82,6 +83,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
// Set up darwin-specific properties.
if (isDarwin())
HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ if (hasQPX() || isBGQ())
+ StackAlignment = 32;
}
/// SetJITMode - This is called to inform the subtarget info that we are
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 416c0f3..15885bd 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -43,7 +43,12 @@ namespace PPC {
DIR_A2,
DIR_E500mc,
DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
DIR_PWR6,
+ DIR_PWR6X,
DIR_PWR7,
DIR_64
};
@@ -70,6 +75,7 @@ protected:
bool Use64BitRegs;
bool IsPPC64;
bool HasAltivec;
+ bool HasQPX;
bool HasFSQRT;
bool HasSTFIWX;
bool HasISEL;
@@ -150,6 +156,7 @@ public:
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
+ bool hasQPX() const { return HasQPX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool isBookE() const { return IsBookE; }
@@ -160,6 +167,8 @@ public:
bool isDarwin() const { return TargetTriple.isMacOSX(); }
/// isBGP - True if this is a BG/P platform.
bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b8b7882..fe851c1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -126,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
return false;
}
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our PPC pass. This
+ // allows the PPC pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index d917d99..606ccb3 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -68,6 +68,9 @@ public:
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE);
+
+ /// \brief Register PPC analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 0000000..5e9ad34
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,236 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppctti"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI : public ImmutablePass, public TargetTransformInfo {
+ const PPCTargetMachine *TM;
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ PPCTTI(const PPCTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializePPCTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+ "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+ return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ // FIXME: PPC currently does not have custom popcnt lowering even though
+ // there is hardware support. Once this is fixed, update this function
+ // to reflect the real capabilities of the hardware.
+ return PSK_Software;
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasAltivec())
+ return 0;
+ return 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 12;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty +
+ TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ // Each load/store unit costs 1.
+ unsigned Cost = LT.first * 1;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (SrcBytes && Alignment && Alignment < SrcBytes)
+ Cost *= (SrcBytes/Alignment);
+
+ return Cost;
+}
+
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 0f5125d..ba87918 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -23,17 +23,19 @@ class AMDGPUTargetMachine;
// R600 Passes
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
+FunctionPass *createSIInsertWaits(TargetMachine &tm);
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+FunctionPass* createAMDGPUIndirectAddressingPass(TargetMachine &tm);
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 754506c..c30dbe4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -47,6 +47,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
#endif
}
SetupMachineFunction(MF);
+ if (OutStreamer.hasRawTextSupport()) {
+ OutStreamer.EmitRawText("@" + MF.getName() + ":");
+ }
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
EmitProgramInfo(MF);
@@ -88,8 +91,6 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
switch (reg) {
default: break;
case AMDGPU::EXEC:
- case AMDGPU::SI_LITERAL_CONSTANT:
- case AMDGPU::SREG_LIT_0:
case AMDGPU::M0:
continue;
}
@@ -115,10 +116,16 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
isSGPR = true;
width = 8;
+ } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 8;
+ } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 16;
} else {
assert(!"Unknown register class");
}
- hwReg = RI->getEncodingValue(reg);
+ hwReg = RI->getEncodingValue(reg) & 0xff;
maxUsed = hwReg + width - 1;
if (isSGPR) {
MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
diff --git a/lib/Target/R600/AMDGPUCodeEmitter.h b/lib/Target/R600/AMDGPUCodeEmitter.h
deleted file mode 100644
index 84f3588..0000000
--- a/lib/Target/R600/AMDGPUCodeEmitter.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief CodeEmitter interface for R600 and SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUCODEEMITTER_H
-#define AMDGPUCODEEMITTER_H
-
-namespace llvm {
-
-class AMDGPUCodeEmitter {
-public:
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
- virtual uint64_t getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const { return 0; }
- virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual uint64_t VOPPostEncode(const MachineInstr &MI,
- uint64_t Value) const {
- return Value;
- }
- virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
- const {
- return 0;
- }
-};
-
-} // End namespace llvm
-
-#endif // AMDGPUCODEEMITTER_H
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp
new file mode 100644
index 0000000..815d6f7
--- /dev/null
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -0,0 +1,122 @@
+//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPURegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
+
+AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+
+unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
+
+ // XXX: Hardcoding to 1 for now.
+ //
+ // I think the StackWidth should stored as metadata associated with the
+ // MachineFunction. This metadata can either be added by a frontend, or
+ // calculated by a R600 specific LLVM IR pass.
+ //
+ // The StackWidth determines how stack objects are laid out in memory.
+ // For a vector stack variable, like: int4 stack[2], the data will be stored
+ // in the following ways depending on the StackWidth.
+ //
+ // StackWidth = 1:
+ //
+ // T0.X = stack[0].x
+ // T1.X = stack[0].y
+ // T2.X = stack[0].z
+ // T3.X = stack[0].w
+ // T4.X = stack[1].x
+ // T5.X = stack[1].y
+ // T6.X = stack[1].z
+ // T7.X = stack[1].w
+ //
+ // StackWidth = 2:
+ //
+ // T0.X = stack[0].x
+ // T0.Y = stack[0].y
+ // T1.X = stack[0].z
+ // T1.Y = stack[0].w
+ // T2.X = stack[1].x
+ // T2.Y = stack[1].y
+ // T3.X = stack[1].z
+ // T3.Y = stack[1].w
+ //
+ // StackWidth = 4:
+ // T0.X = stack[0].x
+ // T0.Y = stack[0].y
+ // T0.Z = stack[0].z
+ // T0.W = stack[0].w
+ // T1.X = stack[1].x
+ // T1.Y = stack[1].y
+ // T1.Z = stack[1].z
+ // T1.W = stack[1].w
+ return 1;
+}
+
+/// \returns The number of registers allocated for \p FI.
+int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Offset = 0;
+ int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+
+ for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
+ const AllocaInst *Alloca = MFI->getObjectAllocation(i);
+ unsigned ArrayElements;
+ const Type *AllocaType = Alloca->getAllocatedType();
+ const Type *ElementType;
+
+ if (AllocaType->isArrayTy()) {
+ ArrayElements = AllocaType->getArrayNumElements();
+ ElementType = AllocaType->getArrayElementType();
+ } else {
+ ArrayElements = 1;
+ ElementType = AllocaType;
+ }
+
+ unsigned VectorElements;
+ if (ElementType->isVectorTy()) {
+ VectorElements = ElementType->getVectorNumElements();
+ } else {
+ VectorElements = 1;
+ }
+
+ Offset += (VectorElements / getStackWidth(MF)) * ArrayElements;
+ }
+ return Offset;
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+void
+AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
+
+bool
+AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ return false;
+}
diff --git a/lib/Target/R600/AMDILFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h
index 51337c3..cf5742e 100644
--- a/lib/Target/R600/AMDILFrameLowering.h
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -1,4 +1,4 @@
-//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -30,6 +30,10 @@ public:
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
unsigned TransAl = 1);
virtual ~AMDGPUFrameLowering();
+
+ /// \returns The number of 32-bit sub-registers that are used when storing
+ /// values to the stack.
+ virtual unsigned getStackWidth(const MachineFunction &MF) const;
virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
virtual void emitPrologue(MachineFunction &MF) const;
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 473dac4..0a33264 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -127,9 +127,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDIL_fraction:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_mad:
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
case AMDGPUIntrinsic::AMDIL_max:
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
- return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2),
- OneSubAC);
+ return DAG.getNode(ISD::FADD, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+ OneSubAC);
}
/// \brief Generate Min/Max node
@@ -393,7 +390,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
// AMDIL DAG nodes
- NODE_NAME_CASE(MAD);
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(DIV_INF);
@@ -410,8 +406,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(URECIP)
- NODE_NAME_CASE(INTERP)
- NODE_NAME_CASE(INTERP_P0)
NODE_NAME_CASE(EXPORT)
+ NODE_NAME_CASE(CONST_ADDRESS)
+ NODE_NAME_CASE(REGISTER_LOAD)
+ NODE_NAME_CASE(REGISTER_STORE)
}
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index c7abaf6..9e7d997 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -53,6 +53,11 @@ public:
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc DL, SelectionDAG &DAG) const;
+ virtual SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ CLI.Callee.dump();
+ llvm_unreachable("Undefined function");
+ }
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
@@ -60,6 +65,10 @@ public:
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
virtual const char* getTargetNodeName(unsigned Opcode) const;
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const {
+ return N;
+ }
+
// Functions defined in AMDILISelLowering.cpp
public:
@@ -103,7 +112,6 @@ namespace AMDGPUISD {
enum {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- MAD, // 32bit Fused Multiply Add instruction
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
DIV_INF, // Divide with infinity returned on zero divisor
@@ -120,25 +128,16 @@ enum {
SMIN,
UMIN,
URECIP,
- INTERP,
- INTERP_P0,
EXPORT,
+ CONST_ADDRESS,
+ REGISTER_LOAD,
+ REGISTER_STORE,
LAST_AMDGPU_ISD_NUMBER
};
} // End namespace AMDGPUISD
-namespace SIISD {
-
-enum {
- SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
- VCC_AND,
- VCC_BITCAST
-};
-
-} // End namespace SIISD
-
} // End namespace llvm
#endif // AMDGPUISELLOWERING_H
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
new file mode 100644
index 0000000..15840b3
--- /dev/null
+++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
@@ -0,0 +1,344 @@
+//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Instructions can use indirect addressing to index the register file as if it
+/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
+/// to either a COPY or a MOV that uses indirect addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const AMDGPUInstrInfo *TII;
+
+ bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
+
+public:
+ AMDGPUIndirectAddressingPass(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const AMDGPUInstrInfo*>(tm.getInstrInfo()))
+ { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Handle indirect addressing"; }
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUIndirectAddressingPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
+ return new AMDGPUIndirectAddressingPass(tm);
+}
+
+bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ int IndirectBegin = TII->getIndirectIndexBegin(MF);
+ int IndirectEnd = TII->getIndirectIndexEnd(MF);
+
+ if (IndirectBegin == -1) {
+ // No indirect addressing, we can skip this pass
+ assert(IndirectEnd == -1);
+ return false;
+ }
+
+ // The map keeps track of the indirect address that is represented by
+ // each virtual register. The key is the register and the value is the
+ // indirect address it uses.
+ std::map<unsigned, unsigned> RegisterAddressMap;
+
+ // First pass - Lower all of the RegisterStore instructions and track which
+ // registers are live.
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ // This map keeps track of the current live indirect registers.
+ // The key is the address and the value is the register
+ std::map<unsigned, unsigned> LiveAddressRegisterMap;
+ MachineBasicBlock &MBB = *BB;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!TII->isRegisterStore(MI)) {
+ continue;
+ }
+
+ // Lower RegisterStore
+
+ unsigned RegIndex = MI.getOperand(2).getImm();
+ unsigned Channel = MI.getOperand(3).getImm();
+ unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+ const TargetRegisterClass *IndirectStoreRegClass =
+ TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
+
+ if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+ // Direct register access.
+ unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
+ .addOperand(MI.getOperand(0));
+
+ RegisterAddressMap[DstReg] = Address;
+ LiveAddressRegisterMap[Address] = DstReg;
+ } else {
+ // Indirect register access.
+ MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
+ MI.getOperand(0).getReg(), // Value
+ Address,
+ MI.getOperand(1).getReg()); // Offset
+ for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+ unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+ unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
+ MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
+ RegisterAddressMap[DstReg] = Addr;
+ LiveAddressRegisterMap[Addr] = DstReg;
+ }
+ }
+ MI.eraseFromParent();
+ }
+
+ // Update the live-ins of the succesor blocks
+ for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
+ SuccEnd = MBB.succ_end();
+ SuccEnd != Succ; ++Succ) {
+ std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
+ for (Key = LiveAddressRegisterMap.begin(),
+ KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
+ (*Succ)->addLiveIn(Key->second);
+ }
+ }
+ }
+
+ // Second pass - Lower the RegisterLoad instructions
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ // Key is the address and the value is the register
+ std::map<unsigned, unsigned> LiveAddressRegisterMap;
+ MachineBasicBlock &MBB = *BB;
+
+ MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
+ while (LI != MBB.livein_end()) {
+ std::vector<unsigned> PhiRegisters;
+
+ // Make sure this live in is used for indirect addressing
+ if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
+ ++LI;
+ continue;
+ }
+
+ unsigned Address = RegisterAddressMap[*LI];
+ LiveAddressRegisterMap[Address] = *LI;
+ PhiRegisters.push_back(*LI);
+
+ // Check if there are other live in registers which map to the same
+ // indirect address.
+ for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
+ LE = MBB.livein_end();
+ LJ != LE; ++LJ) {
+ unsigned Reg = *LJ;
+ if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
+ continue;
+ }
+
+ if (RegisterAddressMap[Reg] == Address) {
+ PhiRegisters.push_back(Reg);
+ }
+ }
+
+ if (PhiRegisters.size() == 1) {
+ // We don't need to insert a Phi instruction, so we can just add the
+ // registers to the live list for the block.
+ LiveAddressRegisterMap[Address] = *LI;
+ MBB.removeLiveIn(*LI);
+ } else {
+ // We need to insert a PHI, because we have the same address being
+ // written in multiple predecessor blocks.
+ const TargetRegisterClass *PhiDstClass =
+ TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
+ unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
+ MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
+ MBB.findDebugLoc(MBB.begin()),
+ TII->get(AMDGPU::PHI), PhiDstReg);
+
+ for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
+ RE = PhiRegisters.end();
+ RI != RE; ++RI) {
+ unsigned Reg = *RI;
+ MachineInstr *DefInst = MRI.getVRegDef(Reg);
+ assert(DefInst);
+ MachineBasicBlock *RegBlock = DefInst->getParent();
+ Phi.addReg(Reg);
+ Phi.addMBB(RegBlock);
+ MBB.removeLiveIn(Reg);
+ }
+ RegisterAddressMap[PhiDstReg] = Address;
+ LiveAddressRegisterMap[Address] = PhiDstReg;
+ }
+ LI = MBB.livein_begin();
+ }
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
+ I != MBB.end(); I = Next) {
+ Next = llvm::next(I);
+ MachineInstr &MI = *I;
+
+ if (!TII->isRegisterLoad(MI)) {
+ if (MI.getOpcode() == AMDGPU::PHI) {
+ continue;
+ }
+ // Check for indirect register defs
+ for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
+ OpIdx < NumOperands; ++OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isReg() && MO.isDef() &&
+ RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
+ unsigned Reg = MO.getReg();
+ unsigned LiveAddress = RegisterAddressMap[Reg];
+ // Chain the live-ins
+ if (LiveAddressRegisterMap.find(LiveAddress) !=
+ RegisterAddressMap.end()) {
+ MI.addOperand(MachineOperand::CreateReg(
+ LiveAddressRegisterMap[LiveAddress],
+ false, // isDef
+ true, // isImp
+ true)); // isKill
+ }
+ LiveAddressRegisterMap[LiveAddress] = Reg;
+ }
+ }
+ continue;
+ }
+
+ const TargetRegisterClass *SuperIndirectRegClass =
+ TII->getSuperIndirectRegClass();
+ const TargetRegisterClass *IndirectLoadRegClass =
+ TII->getIndirectAddrLoadRegClass();
+ unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
+
+ unsigned RegIndex = MI.getOperand(2).getImm();
+ unsigned Channel = MI.getOperand(3).getImm();
+ unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
+
+ if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
+ // Direct register access
+ unsigned Reg = LiveAddressRegisterMap[Address];
+ unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
+
+ if (regHasExplicitDef(MRI, Reg)) {
+ // If the register we are reading from has an explicit def, then that
+ // means it was written via a direct register access (i.e. COPY
+ // or other instruction that doesn't use indirect addressing). In
+ // this case we know where the value has been stored, so we can just
+ // issue a copy.
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(Reg);
+ } else {
+ // If the register we are reading has an implicit def, then that
+ // means it was written by an indirect register access (i.e. An
+ // instruction that uses indirect addressing.
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(AddrReg)
+ .addReg(Reg, RegState::Implicit);
+ }
+ } else {
+ // Indirect register access
+
+ // Note on REQ_SEQUENCE instructons: You can't actually use the register
+ // it defines unless you have an instruction that takes the defined
+ // register class as an operand.
+
+ MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
+ TII->get(AMDGPU::REG_SEQUENCE),
+ IndirectReg);
+ for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
+ unsigned Addr = TII->calculateIndirectAddress(i, Channel);
+ if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
+ continue;
+ }
+ unsigned Reg = LiveAddressRegisterMap[Addr];
+
+ // We only need to use REG_SEQUENCE for explicit defs, since the
+ // register coalescer won't do anything with the implicit defs.
+ MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+ if (!regHasExplicitDef(MRI, Reg)) {
+ continue;
+ }
+
+ // Insert a REQ_SEQUENCE instruction to force the register allocator
+ // to allocate the virtual register to the correct physical register.
+ Sequence.addReg(LiveAddressRegisterMap[Addr]);
+ Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
+ }
+ MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
+ MI.getOperand(0).getReg(), // Value
+ Address,
+ MI.getOperand(1).getReg()); // Offset
+
+
+
+ Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
+ Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
+
+ }
+ MI.eraseFromParent();
+ }
+ }
+ return false;
+}
+
+bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ MachineInstr *DefInstr = MRI.getVRegDef(Reg);
+
+ if (!DefInstr) {
+ return false;
+ }
+
+ if (DefInstr->getOpcode() == AMDGPU::PHI) {
+ bool Explicit = false;
+ for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
+ E = DefInstr->operands_end();
+ I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef()) {
+ continue;
+ }
+
+ Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
+ }
+ return Explicit;
+ }
+
+ return DefInstr->getOperand(0).isReg() &&
+ DefInstr->getOperand(0).getReg() == Reg;
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index e42a46d..30f736c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
using namespace llvm;
@@ -234,7 +235,16 @@ AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
// TODO: Implement this function
return true;
}
-
+
+bool AMDGPUInstrInfo::isRegisterStore(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
+}
+
+bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
+}
+
+
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index cb97af9..3909e4e 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -40,9 +40,10 @@ class MachineInstrBuilder;
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
private:
const AMDGPURegisterInfo RI;
- TargetMachine &TM;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
+protected:
+ TargetMachine &TM;
public:
explicit AMDGPUInstrInfo(TargetMachine &tm);
@@ -130,12 +131,66 @@ public:
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
bool isStoreInst(llvm::MachineInstr *MI) const;
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+ bool isRegisterStore(const MachineInstr &MI) const;
+ bool isRegisterLoad(const MachineInstr &MI) const;
+
+//===---------------------------------------------------------------------===//
+// Pure virtual funtions to be implemented by sub-classes.
+//===---------------------------------------------------------------------===//
virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const = 0;
virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
+ /// \returns the smallest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
+
+ /// \returns the largest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
+
+ /// \brief Calculate the "Indirect Address" for the given \p RegIndex and
+ /// \p Channel
+ ///
+ /// We model indirect addressing using a virtual address space that can be
+ /// accesed with loads and stores. The "Indirect Address" is the memory
+ /// address in this virtual address space that maps to the given \p RegIndex
+ /// and \p Channel.
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const = 0;
+
+ /// \returns The register class to be used for storing values to an
+ /// "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const = 0;
+
+ /// \returns The register class to be used for loading values from
+ /// an "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+
+ /// \brief Build instruction(s) for an indirect register write.
+ ///
+ /// \returns The instruction that performs the indirect register write
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const = 0;
+
+ /// \brief Build instruction(s) for an indirect register read.
+ ///
+ /// \returns The instruction that performs the indirect register read
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const = 0;
+
+ /// \returns the register class whose sub registers are the set of all
+ /// possible registers that can be used for indirect addressing.
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
+
+
/// \brief Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
@@ -145,4 +200,7 @@ public:
} // End llvm namespace
+#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
+#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
+
#endif // AMDGPUINSTRINFO_H
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 96368e8..b66ae87 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -72,3 +72,11 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+
+def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
+ SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayStore]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index e634d20..960f108 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
- field bits<16> AMDILOp = 0;
- field bits<3> Gen = 0;
+ field bit isRegisterLoad = 0;
+ field bit isRegisterStore = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
@@ -22,8 +22,9 @@ class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instructio
let AsmString = asm;
let Pattern = pattern;
let Itinerary = NullALU;
- let TSFlags{42-40} = Gen;
- let TSFlags{63-48} = AMDILOp;
+
+ let TSFlags{63} = isRegisterLoad;
+ let TSFlags{62} = isRegisterStore;
}
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
@@ -76,6 +77,11 @@ def COND_LE : PatLeaf <
case ISD::SETLE: return true;}}}]
>;
+def COND_NULL : PatLeaf <
+ (cond),
+ [{return false;}]
+>;
+
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
@@ -101,7 +107,9 @@ def FP_ONE : PatLeaf <
[{return N->isExactlyValue(1.0);}]
>;
-let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
+let isCodeGenOnly = 1, isPseudo = 1 in {
+
+let usesCustomInserter = 1 in {
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
@@ -131,7 +139,31 @@ def SHADER_TYPE : AMDGPUShaderInst <
[(int_AMDGPU_shader_type imm:$type)]
>;
-} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
+} // usesCustomInserter = 1
+
+multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
+ ComplexPattern addrPat> {
+ def RegisterLoad : AMDGPUShaderInst <
+ (outs dstClass:$dst),
+ (ins addrClass:$addr, i32imm:$chan),
+ "RegisterLoad $dst, $addr",
+ [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
+ (i32 timm:$chan)))]
+ > {
+ let isRegisterLoad = 1;
+ }
+
+ def RegisterStore : AMDGPUShaderInst <
+ (outs),
+ (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
+ "RegisterStore $val, $addr",
+ [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+ > {
+ let isRegisterStore = 1;
+ }
+}
+
+} // End isCodeGenOnly = 1, isPseudo = 1
/* Generic helper patterns for intrinsics */
/* -------------------------------------- */
@@ -164,13 +196,64 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
>;
// Vector Build pattern
+class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$src))),
+ (vecType elemClass:$src)
+>;
+
+class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+ (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+>;
+
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
- elemClass:$z, sel_z), elemClass:$w, sel_w)
+ (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
+ elemClass:$z, sub2), elemClass:$w, sub3)
+>;
+
+class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+>;
+
+class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
+ (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
+ (elemType elemClass:$sub2), (elemType elemClass:$sub3),
+ (elemType elemClass:$sub4), (elemType elemClass:$sub5),
+ (elemType elemClass:$sub6), (elemType elemClass:$sub7),
+ (elemType elemClass:$sub8), (elemType elemClass:$sub9),
+ (elemType elemClass:$sub10), (elemType elemClass:$sub11),
+ (elemType elemClass:$sub12), (elemType elemClass:$sub13),
+ (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
+ elemClass:$sub2, sub2), elemClass:$sub3, sub3),
+ elemClass:$sub4, sub4), elemClass:$sub5, sub5),
+ elemClass:$sub6, sub6), elemClass:$sub7, sub7),
+ elemClass:$sub8, sub8), elemClass:$sub9, sub9),
+ elemClass:$sub10, sub10), elemClass:$sub11, sub11),
+ elemClass:$sub12, sub12), elemClass:$sub13, sub13),
+ elemClass:$sub14, sub14), elemClass:$sub15, sub15)
>;
// bitconvert pattern
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index eeafec8..fe994d2 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -38,6 +38,7 @@ const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF
void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj,
+ unsigned FIOperandNum,
RegScavenger *RS) const {
assert(!"Subroutines not supported yet");
}
@@ -47,5 +48,28 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return 0;
}
+unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
+
+ switch(IndirectIndex) {
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
+ case 4: return AMDGPU::sub4;
+ case 5: return AMDGPU::sub5;
+ case 6: return AMDGPU::sub6;
+ case 7: return AMDGPU::sub7;
+ case 8: return AMDGPU::sub8;
+ case 9: return AMDGPU::sub9;
+ case 10: return AMDGPU::sub10;
+ case 11: return AMDGPU::sub11;
+ case 12: return AMDGPU::sub12;
+ case 13: return AMDGPU::sub13;
+ case 14: return AMDGPU::sub14;
+ case 15: return AMDGPU::sub15;
+ default: llvm_unreachable("indirect index out of range");
+ }
+}
+
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 76ee7ae..1fc88e7 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -53,9 +53,12 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
RegScavenger *RS) const;
unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getIndirectSubReg(unsigned IndirectIndex) const;
+
};
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td
index 8181e02..b5aca03 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.td
+++ b/lib/Target/R600/AMDGPURegisterInfo.td
@@ -12,10 +12,13 @@
//===----------------------------------------------------------------------===//
let Namespace = "AMDGPU" in {
- def sel_x : SubRegIndex;
- def sel_y : SubRegIndex;
- def sel_z : SubRegIndex;
- def sel_w : SubRegIndex;
+
+foreach Index = 0-15 in {
+ def sub#Index : SubRegIndex;
+}
+
+def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;
+
}
include "R600RegisterInfo.td"
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
index 8295efd..26f842e 100644
--- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -22,30 +22,101 @@
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/PatternMatch.h"
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace {
// Definition of the complex types used in this pass.
typedef std::pair<BasicBlock *, Value *> BBValuePair;
-typedef ArrayRef<BasicBlock*> BBVecRef;
typedef SmallVector<RegionNode*, 8> RNVector;
typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BranchInst*, 8> BranchVector;
typedef SmallVector<BBValuePair, 2> BBValueVector;
+typedef SmallPtrSet<BasicBlock *, 8> BBSet;
+
typedef DenseMap<PHINode *, BBValueVector> PhiMap;
+typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
typedef DenseMap<BasicBlock *, Value *> BBPredicates;
typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
-typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
+typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
+typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
// The name for newly created blocks.
static const char *FlowBlockName = "Flow";
+/// @brief Find the nearest common dominator for multiple BasicBlocks
+///
+/// Helper class for AMDGPUStructurizeCFG
+/// TODO: Maybe move into common code
+class NearestCommonDominator {
+
+ DominatorTree *DT;
+
+ DTN2UnsignedMap IndexMap;
+
+ BasicBlock *Result;
+ unsigned ResultIndex;
+ bool ExplicitMentioned;
+
+public:
+ /// \brief Start a new query
+ NearestCommonDominator(DominatorTree *DomTree) {
+ DT = DomTree;
+ Result = 0;
+ }
+
+ /// \brief Add BB to the resulting dominator
+ void addBlock(BasicBlock *BB, bool Remember = true) {
+
+ DomTreeNode *Node = DT->getNode(BB);
+
+ if (Result == 0) {
+ unsigned Numbering = 0;
+ for (;Node;Node = Node->getIDom())
+ IndexMap[Node] = ++Numbering;
+ Result = BB;
+ ResultIndex = 1;
+ ExplicitMentioned = Remember;
+ return;
+ }
+
+ for (;Node;Node = Node->getIDom())
+ if (IndexMap.count(Node))
+ break;
+ else
+ IndexMap[Node] = 0;
+
+ assert(Node && "Dominator tree invalid!");
+
+ unsigned Numbering = IndexMap[Node];
+ if (Numbering > ResultIndex) {
+ Result = Node->getBlock();
+ ResultIndex = Numbering;
+ ExplicitMentioned = Remember && (Result == BB);
+ } else if (Numbering == ResultIndex) {
+ ExplicitMentioned |= Remember;
+ }
+ }
+
+ /// \brief Is "Result" one of the BBs added with "Remember" = True?
+ bool wasResultExplicitMentioned() {
+ return ExplicitMentioned;
+ }
+
+ /// \brief Get the query result
+ BasicBlock *getResult() {
+ return Result;
+ }
+};
+
/// @brief Transforms the control flow graph on one single entry/exit region
/// at a time.
///
@@ -106,45 +177,62 @@ class AMDGPUStructurizeCFG : public RegionPass {
DominatorTree *DT;
RNVector Order;
- VisitedMap Visited;
- PredMap Predicates;
+ BBSet Visited;
+
BBPhiMap DeletedPhis;
- BBVector FlowsInserted;
+ BB2BBVecMap AddedPhis;
+
+ PredMap Predicates;
+ BranchVector Conditions;
- BasicBlock *LoopStart;
- BasicBlock *LoopEnd;
- BBPredicates LoopPred;
+ BB2BBMap Loops;
+ PredMap LoopPreds;
+ BranchVector LoopConds;
+
+ RegionNode *PrevNode;
void orderNodes();
- void buildPredicate(BranchInst *Term, unsigned Idx,
- BBPredicates &Pred, bool Invert);
+ void analyzeLoops(RegionNode *N);
+
+ Value *invert(Value *Condition);
- void analyzeBlock(BasicBlock *BB);
+ Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
- void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
+ void gatherPredicates(RegionNode *N);
void collectInfos();
- bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
+ void insertConditions(bool Loops);
+
+ void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+ void setPhiValues();
void killTerminator(BasicBlock *BB);
- RegionNode *skipChained(RegionNode *Node);
+ void changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator);
- void delPhiValues(BasicBlock *From, BasicBlock *To);
+ BasicBlock *getNextFlow(BasicBlock *Dominator);
- void addPhiValues(BasicBlock *From, BasicBlock *To);
+ BasicBlock *needPrefix(bool NeedEmpty);
- BasicBlock *getNextFlow(BasicBlock *Prev);
+ BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
- bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
+ void setPrevNode(BasicBlock *BB);
- BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
+ bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
- void createFlow();
+ bool isPredictableTrue(RegionNode *Node);
+
+ void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
- void insertConditions();
+ void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
+
+ void createFlow();
void rebuildSSA();
@@ -198,212 +286,214 @@ void AMDGPUStructurizeCFG::orderNodes() {
}
}
-/// \brief Build blocks and loop predicates
-void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
- BBPredicates &Pred, bool Invert) {
- Value *True = Invert ? BoolFalse : BoolTrue;
- Value *False = Invert ? BoolTrue : BoolFalse;
+/// \brief Determine the end of the loops
+void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) {
- RegionInfo *RI = ParentRegion->getRegionInfo();
- BasicBlock *BB = Term->getParent();
+ if (N->isSubRegion()) {
+ // Test for exit as back edge
+ BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
+ if (Visited.count(Exit))
+ Loops[Exit] = N->getEntry();
+
+ } else {
+ // Test for sucessors as back edge
+ BasicBlock *BB = N->getNodeAs<BasicBlock>();
+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
- // Handle the case where multiple regions start at the same block
- Region *R = BB != ParentRegion->getEntry() ?
- RI->getRegionFor(BB) : ParentRegion;
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
+ }
+ }
+}
- if (R == ParentRegion) {
- // It's a top level block in our region
- Value *Cond = True;
- if (Term->isConditional()) {
- BasicBlock *Other = Term->getSuccessor(!Idx);
+/// \brief Invert the given condition
+Value *AMDGPUStructurizeCFG::invert(Value *Condition) {
- if (Visited.count(Other)) {
- if (!Pred.count(Other))
- Pred[Other] = False;
+ // First: Check if it's a constant
+ if (Condition == BoolTrue)
+ return BoolFalse;
- if (!Pred.count(BB))
- Pred[BB] = True;
- return;
- }
- Cond = Term->getCondition();
+ if (Condition == BoolFalse)
+ return BoolTrue;
- if (Idx != Invert)
- Cond = BinaryOperator::CreateNot(Cond, "", Term);
- }
+ if (Condition == BoolUndef)
+ return BoolUndef;
- Pred[BB] = Cond;
+ // Second: If the condition is already inverted, return the original value
+ if (match(Condition, m_Not(m_Value(Condition))))
+ return Condition;
- } else if (ParentRegion->contains(R)) {
- // It's a block in a sub region
- while(R->getParent() != ParentRegion)
- R = R->getParent();
+ // Third: Check all the users for an invert
+ BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
+ for (Value::use_iterator I = Condition->use_begin(),
+ E = Condition->use_end(); I != E; ++I) {
- Pred[R->getEntry()] = True;
+ Instruction *User = dyn_cast<Instruction>(*I);
+ if (!User || User->getParent() != Parent)
+ continue;
- } else {
- // It's a branch from outside into our parent region
- Pred[BB] = True;
+ if (match(*I, m_Not(m_Specific(Condition))))
+ return *I;
}
-}
-/// \brief Analyze the successors of each block and build up predicates
-void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- BBPredicates &Pred = Predicates[BB];
+ // Last option: Create a new instruction
+ return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+}
- for (; PI != PE; ++PI) {
- BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+/// \brief Build the condition for one edge
+Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+ bool Invert) {
+ Value *Cond = Invert ? BoolFalse : BoolTrue;
+ if (Term->isConditional()) {
+ Cond = Term->getCondition();
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = Term->getSuccessor(i);
- if (Succ != BB)
- continue;
- buildPredicate(Term, i, Pred, false);
- }
+ if (Idx != Invert)
+ Cond = invert(Cond);
}
+ return Cond;
}
-/// \brief Analyze the conditions leading to loop to a previous block
-void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
- BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+/// \brief Analyze the predecessors of each block and build up predicates
+void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) {
- for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
- BasicBlock *Succ = Term->getSuccessor(i);
+ RegionInfo *RI = ParentRegion->getRegionInfo();
+ BasicBlock *BB = N->getEntry();
+ BBPredicates &Pred = Predicates[BB];
+ BBPredicates &LPred = LoopPreds[BB];
+
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ PI != PE; ++PI) {
- // Ignore it if it's not a back edge
- if (!Visited.count(Succ))
+ // Ignore it if it's a branch from outside into our region entry
+ if (!ParentRegion->contains(*PI))
continue;
- buildPredicate(Term, i, LoopPred, true);
+ Region *R = RI->getRegionFor(*PI);
+ if (R == ParentRegion) {
- LoopEnd = BB;
- if (Visited[Succ] < LoopIdx) {
- LoopIdx = Visited[Succ];
- LoopStart = Succ;
+ // It's a top level block in our region
+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (Succ != BB)
+ continue;
+
+ if (Visited.count(*PI)) {
+ // Normal forward edge
+ if (Term->isConditional()) {
+ // Try to treat it like an ELSE block
+ BasicBlock *Other = Term->getSuccessor(!i);
+ if (Visited.count(Other) && !Loops.count(Other) &&
+ !Pred.count(Other) && !Pred.count(*PI)) {
+
+ Pred[Other] = BoolFalse;
+ Pred[*PI] = BoolTrue;
+ continue;
+ }
+ }
+ Pred[*PI] = buildCondition(Term, i, false);
+
+ } else {
+ // Back edge
+ LPred[*PI] = buildCondition(Term, i, true);
+ }
+ }
+
+ } else {
+
+ // It's an exit from a sub region
+ while(R->getParent() != ParentRegion)
+ R = R->getParent();
+
+ // Edge from inside a subregion to its entry, ignore it
+ if (R == N)
+ continue;
+
+ BasicBlock *Entry = R->getEntry();
+ if (Visited.count(Entry))
+ Pred[Entry] = BoolTrue;
+ else
+ LPred[Entry] = BoolFalse;
}
}
}
/// \brief Collect various loop and predicate infos
void AMDGPUStructurizeCFG::collectInfos() {
- unsigned Number = 0, LoopIdx = ~0;
// Reset predicate
Predicates.clear();
// and loop infos
- LoopStart = LoopEnd = 0;
- LoopPred.clear();
+ Loops.clear();
+ LoopPreds.clear();
+
+ // Reset the visited nodes
+ Visited.clear();
- RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
- for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
+ for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+ OI != OE; ++OI) {
// Analyze all the conditions leading to a node
- analyzeBlock((*OI)->getEntry());
+ gatherPredicates(*OI);
- if ((*OI)->isSubRegion())
- continue;
+ // Remember that we've seen this node
+ Visited.insert((*OI)->getEntry());
- // Find the first/last loop nodes and loop predicates
- analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
+ // Find the last back edges
+ analyzeLoops(*OI);
}
}
-/// \brief Does A dominate all the predicates of B ?
-bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
- BBPredicates &Preds = Predicates[B];
- for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
- PI != PE; ++PI) {
+/// \brief Insert the missing branch conditions
+void AMDGPUStructurizeCFG::insertConditions(bool Loops) {
+ BranchVector &Conds = Loops ? LoopConds : Conditions;
+ Value *Default = Loops ? BoolTrue : BoolFalse;
+ SSAUpdater PhiInserter;
- if (!DT->dominates(A, PI->first))
- return false;
- }
- return true;
-}
+ for (BranchVector::iterator I = Conds.begin(),
+ E = Conds.end(); I != E; ++I) {
-/// \brief Remove phi values from all successors and the remove the terminator.
-void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
- TerminatorInst *Term = BB->getTerminator();
- if (!Term)
- return;
+ BranchInst *Term = *I;
+ assert(Term->isConditional());
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
- SI != SE; ++SI) {
+ BasicBlock *Parent = Term->getParent();
+ BasicBlock *SuccTrue = Term->getSuccessor(0);
+ BasicBlock *SuccFalse = Term->getSuccessor(1);
- delPhiValues(BB, *SI);
- }
+ PhiInserter.Initialize(Boolean, "");
+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
+ PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
- Term->eraseFromParent();
-}
+ BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
-/// First: Skip forward to the first region node that either isn't a subregion or not
-/// dominating it's exit, remove all the skipped nodes from the node order.
-///
-/// Second: Handle the first successor directly if the resulting nodes successor
-/// predicates are still dominated by the original entry
-RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
- BasicBlock *Entry = Node->getEntry();
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(Parent, false);
- // Skip forward as long as it is just a linear flow
- while (true) {
- BasicBlock *Entry = Node->getEntry();
- BasicBlock *Exit;
+ Value *ParentValue = 0;
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
- if (Node->isSubRegion()) {
- Exit = Node->getNodeAs<Region>()->getExit();
- } else {
- TerminatorInst *Term = Entry->getTerminator();
- if (Term->getNumSuccessors() != 1)
+ if (PI->first == Parent) {
+ ParentValue = PI->second;
break;
- Exit = Term->getSuccessor(0);
+ }
+ PhiInserter.AddAvailableValue(PI->first, PI->second);
+ Dominator.addBlock(PI->first);
}
- // It's a back edge, break here so we can insert a loop node
- if (!Visited.count(Exit))
- return Node;
-
- // More than node edges are pointing to exit
- if (!DT->dominates(Entry, Exit))
- return Node;
-
- RegionNode *Next = ParentRegion->getNode(Exit);
- RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
- assert(I != Order.end());
-
- Visited.erase(Next->getEntry());
- Order.erase(I);
- Node = Next;
- }
+ if (ParentValue) {
+ Term->setCondition(ParentValue);
+ } else {
+ if (!Dominator.wasResultExplicitMentioned())
+ PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
- BasicBlock *BB = Node->getEntry();
- TerminatorInst *Term = BB->getTerminator();
- if (Term->getNumSuccessors() != 2)
- return Node;
-
- // Our node has exactly two succesors, check if we can handle
- // any of them directly
- BasicBlock *Succ = Term->getSuccessor(0);
- if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
- Succ = Term->getSuccessor(1);
- if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
- return Node;
- } else {
- BasicBlock *Succ2 = Term->getSuccessor(1);
- if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
- dominatesPredicates(Entry, Succ2))
- Succ = Succ2;
+ Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ }
}
-
- RegionNode *Next = ParentRegion->getNode(Succ);
- RNVector::iterator E = Order.end();
- RNVector::iterator I = std::find(Order.begin(), E, Next);
- assert(I != E);
-
- killTerminator(BB);
- FlowsInserted.push_back(BB);
- Visited.erase(Succ);
- Order.erase(I);
- return ParentRegion->getNode(wireFlowBlock(BB, Next));
}
/// \brief Remove all PHI values coming from "From" into "To" and remember
@@ -421,224 +511,306 @@ void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
}
}
-/// \brief Add the PHI values back once we knew the new predecessor
+/// \brief Add a dummy PHI value as soon as we knew the new predecessor
void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
- if (!DeletedPhis.count(To))
- return;
+ for (BasicBlock::iterator I = To->begin(), E = To->end();
+ I != E && isa<PHINode>(*I);) {
+
+ PHINode &Phi = cast<PHINode>(*I++);
+ Value *Undef = UndefValue::get(Phi.getType());
+ Phi.addIncoming(Undef, From);
+ }
+ AddedPhis[To].push_back(From);
+}
+
+/// \brief Add the real PHI value as soon as everything is set up
+void AMDGPUStructurizeCFG::setPhiValues() {
- PhiMap &Map = DeletedPhis[To];
SSAUpdater Updater;
+ for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
+ AI != AE; ++AI) {
- for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+ BasicBlock *To = AI->first;
+ BBVector &From = AI->second;
- PHINode *Phi = I->first;
- Updater.Initialize(Phi->getType(), "");
- BasicBlock *Fallback = To;
- bool HaveFallback = false;
+ if (!DeletedPhis.count(To))
+ continue;
- for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
- VI != VE; ++VI) {
+ PhiMap &Map = DeletedPhis[To];
+ for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
+ PI != PE; ++PI) {
- Updater.AddAvailableValue(VI->first, VI->second);
- BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
- if (Dom == VI->first)
- HaveFallback = true;
- else if (Dom != Fallback)
- HaveFallback = false;
- Fallback = Dom;
- }
- if (!HaveFallback) {
+ PHINode *Phi = PI->first;
Value *Undef = UndefValue::get(Phi->getType());
- Updater.AddAvailableValue(Fallback, Undef);
+ Updater.Initialize(Phi->getType(), "");
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(To, Undef);
+
+ NearestCommonDominator Dominator(DT);
+ Dominator.addBlock(To, false);
+ for (BBValueVector::iterator VI = PI->second.begin(),
+ VE = PI->second.end(); VI != VE; ++VI) {
+
+ Updater.AddAvailableValue(VI->first, VI->second);
+ Dominator.addBlock(VI->first);
+ }
+
+ if (!Dominator.wasResultExplicitMentioned())
+ Updater.AddAvailableValue(Dominator.getResult(), Undef);
+
+ for (BBVector::iterator FI = From.begin(), FE = From.end();
+ FI != FE; ++FI) {
+
+ int Idx = Phi->getBasicBlockIndex(*FI);
+ assert(Idx != -1);
+ Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
+ }
+ }
+
+ DeletedPhis.erase(To);
+ }
+ assert(DeletedPhis.empty());
+}
+
+/// \brief Remove phi values from all successors and then remove the terminator.
+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
+ TerminatorInst *Term = BB->getTerminator();
+ if (!Term)
+ return;
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+ SI != SE; ++SI) {
+
+ delPhiValues(BB, *SI);
+ }
+
+ Term->eraseFromParent();
+}
+
+/// \brief Let node exit(s) point to NewExit
+void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
+ bool IncludeDominator) {
+
+ if (Node->isSubRegion()) {
+ Region *SubRegion = Node->getNodeAs<Region>();
+ BasicBlock *OldExit = SubRegion->getExit();
+ BasicBlock *Dominator = 0;
+
+ // Find all the edges from the sub region to the exit
+ for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
+ I != E;) {
+
+ BasicBlock *BB = *I++;
+ if (!SubRegion->contains(BB))
+ continue;
+
+ // Modify the edges to point to the new exit
+ delPhiValues(BB, OldExit);
+ BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
+ addPhiValues(BB, NewExit);
+
+ // Find the new dominator (if requested)
+ if (IncludeDominator) {
+ if (!Dominator)
+ Dominator = BB;
+ else
+ Dominator = DT->findNearestCommonDominator(Dominator, BB);
+ }
}
- Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
+ // Change the dominator (if requested)
+ if (Dominator)
+ DT->changeImmediateDominator(NewExit, Dominator);
+
+ // Update the region info
+ SubRegion->replaceExit(NewExit);
+
+ } else {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+ killTerminator(BB);
+ BranchInst::Create(NewExit, BB);
+ addPhiValues(BB, NewExit);
+ if (IncludeDominator)
+ DT->changeImmediateDominator(NewExit, BB);
}
- DeletedPhis.erase(To);
}
/// \brief Create a new flow node and update dominator tree and region info
-BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) {
LLVMContext &Context = Func->getContext();
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
Order.back()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
- DT->addNewBlock(Flow, Prev);
+ DT->addNewBlock(Flow, Dominator);
ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
- FlowsInserted.push_back(Flow);
return Flow;
}
+/// \brief Create a new or reuse the previous node as flow node
+BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) {
+
+ BasicBlock *Entry = PrevNode->getEntry();
+
+ if (!PrevNode->isSubRegion()) {
+ killTerminator(Entry);
+ if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
+ return Entry;
+
+ }
+
+ // create a new flow node
+ BasicBlock *Flow = getNextFlow(Entry);
+
+ // and wire it up
+ changeExit(PrevNode, Flow, true);
+ PrevNode = ParentRegion->getBBNode(Flow);
+ return Flow;
+}
+
+/// \brief Returns the region exit if possible, otherwise just a new flow node
+BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow,
+ bool ExitUseAllowed) {
+
+ if (Order.empty() && ExitUseAllowed) {
+ BasicBlock *Exit = ParentRegion->getExit();
+ DT->changeImmediateDominator(Exit, Flow);
+ addPhiValues(Flow, Exit);
+ return Exit;
+ }
+ return getNextFlow(Flow);
+}
+
+/// \brief Set the previous node
+void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) {
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+}
+
+/// \brief Does BB dominate all the predicates of Node ?
+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
+ BBPredicates &Preds = Predicates[Node->getEntry()];
+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+
+ if (!DT->dominates(BB, PI->first))
+ return false;
+ }
+ return true;
+}
+
/// \brief Can we predict that this node will always be called?
-bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
- BasicBlock *Node) {
- BBPredicates &Preds = Predicates[Node];
+bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) {
+
+ BBPredicates &Preds = Predicates[Node->getEntry()];
bool Dominated = false;
+ // Regionentry is always true
+ if (PrevNode == 0)
+ return true;
+
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
I != E; ++I) {
if (I->second != BoolTrue)
return false;
- if (!Dominated && DT->dominates(I->first, Prev))
+ if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
Dominated = true;
}
+
+ // TODO: The dominator check is too strict
return Dominated;
}
-/// \brief Wire up the new control flow by inserting or updating the branch
-/// instructions at node exits
-BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
- RegionNode *Node) {
- BasicBlock *Entry = Node->getEntry();
-
- if (LoopStart == Entry) {
- LoopStart = Prev;
- LoopPred[Prev] = BoolTrue;
- }
+/// Take one node from the order vector and wire it up
+void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
- // Wire it up temporary, skipChained may recurse into us
- BranchInst::Create(Entry, Prev);
- DT->changeImmediateDominator(Entry, Prev);
- addPhiValues(Prev, Entry);
+ RegionNode *Node = Order.pop_back_val();
+ Visited.insert(Node->getEntry());
- Node = skipChained(Node);
+ if (isPredictableTrue(Node)) {
+ // Just a linear flow
+ if (PrevNode) {
+ changeExit(PrevNode, Node->getEntry(), true);
+ }
+ PrevNode = Node;
- BasicBlock *Next = getNextFlow(Prev);
- if (!isPredictableTrue(Prev, Entry)) {
- // Let Prev point to entry and next block
- Prev->getTerminator()->eraseFromParent();
- BranchInst::Create(Entry, Next, BoolUndef, Prev);
} else {
- DT->changeImmediateDominator(Next, Entry);
- }
+ // Insert extra prefix node (or reuse last one)
+ BasicBlock *Flow = needPrefix(false);
- // Let node exit(s) point to next block
- if (Node->isSubRegion()) {
- Region *SubRegion = Node->getNodeAs<Region>();
- BasicBlock *Exit = SubRegion->getExit();
+ // Insert extra postfix node (or use exit instead)
+ BasicBlock *Entry = Node->getEntry();
+ BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
- // Find all the edges from the sub region to the exit
- BBVector ToDo;
- for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
- if (SubRegion->contains(*I))
- ToDo.push_back(*I);
- }
+ // let it point to entry and next block
+ Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
+ addPhiValues(Flow, Entry);
+ DT->changeImmediateDominator(Entry, Flow);
- // Modify the edges to point to the new flow block
- for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
- delPhiValues(*I, Exit);
- TerminatorInst *Term = (*I)->getTerminator();
- Term->replaceUsesOfWith(Exit, Next);
+ PrevNode = Node;
+ while (!Order.empty() && !Visited.count(LoopEnd) &&
+ dominatesPredicates(Entry, Order.back())) {
+ handleLoops(false, LoopEnd);
}
- // Update the region info
- SubRegion->replaceExit(Next);
-
- } else {
- BasicBlock *BB = Node->getNodeAs<BasicBlock>();
- killTerminator(BB);
- BranchInst::Create(Next, BB);
-
- if (BB == LoopEnd)
- LoopEnd = 0;
+ changeExit(PrevNode, Next, false);
+ setPrevNode(Next);
}
-
- return Next;
}
-/// Destroy node order and visited map, build up flow order instead.
-/// After this function control flow looks like it should be, but
-/// branches only have undefined conditions.
-void AMDGPUStructurizeCFG::createFlow() {
- DeletedPhis.clear();
+void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed,
+ BasicBlock *LoopEnd) {
+ RegionNode *Node = Order.back();
+ BasicBlock *LoopStart = Node->getEntry();
- BasicBlock *Prev = Order.pop_back_val()->getEntry();
- assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
- Visited.erase(Prev);
-
- if (LoopStart == Prev) {
- // Loop starts at entry, split entry so that we can predicate it
- BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
- BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
- DT->addNewBlock(Split, Prev);
- ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
- Predicates[Split] = Predicates[Prev];
- Order.push_back(ParentRegion->getBBNode(Split));
- LoopPred[Prev] = BoolTrue;
-
- } else if (LoopStart == Order.back()->getEntry()) {
- // Loop starts behind entry, split entry so that we can jump to it
- Instruction *Term = Prev->getTerminator();
- BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
- DT->addNewBlock(Split, Prev);
- ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
- Prev = Split;
+ if (!Loops.count(LoopStart)) {
+ wireFlow(ExitUseAllowed, LoopEnd);
+ return;
}
- killTerminator(Prev);
- FlowsInserted.clear();
- FlowsInserted.push_back(Prev);
+ if (!isPredictableTrue(Node))
+ LoopStart = needPrefix(true);
- while (!Order.empty()) {
- RegionNode *Node = Order.pop_back_val();
- Visited.erase(Node->getEntry());
- Prev = wireFlowBlock(Prev, Node);
- if (LoopStart && !LoopEnd) {
- // Create an extra loop end node
- LoopEnd = Prev;
- Prev = getNextFlow(LoopEnd);
- BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
- addPhiValues(LoopEnd, LoopStart);
- }
+ LoopEnd = Loops[Node->getEntry()];
+ wireFlow(false, LoopEnd);
+ while (!Visited.count(LoopEnd)) {
+ handleLoops(false, LoopEnd);
}
- BasicBlock *Exit = ParentRegion->getExit();
- BranchInst::Create(Exit, Prev);
- addPhiValues(Prev, Exit);
- if (DT->dominates(ParentRegion->getEntry(), Exit))
- DT->changeImmediateDominator(Exit, Prev);
-
- if (LoopStart && LoopEnd) {
- BBVector::iterator FI = std::find(FlowsInserted.begin(),
- FlowsInserted.end(),
- LoopStart);
- for (; *FI != LoopEnd; ++FI) {
- addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
- }
- }
-
- assert(Order.empty());
- assert(Visited.empty());
- assert(DeletedPhis.empty());
+ // Create an extra loop end node
+ LoopEnd = needPrefix(false);
+ BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
+ LoopConds.push_back(BranchInst::Create(Next, LoopStart,
+ BoolUndef, LoopEnd));
+ addPhiValues(LoopEnd, LoopStart);
+ setPrevNode(Next);
}
-/// \brief Insert the missing branch conditions
-void AMDGPUStructurizeCFG::insertConditions() {
- SSAUpdater PhiInserter;
-
- for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
- FI != FE; ++FI) {
-
- BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
- if (Term->isUnconditional())
- continue;
+/// After this function control flow looks like it should be, but
+/// branches and PHI nodes only have undefined conditions.
+void AMDGPUStructurizeCFG::createFlow() {
- PhiInserter.Initialize(Boolean, "");
- PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
+ BasicBlock *Exit = ParentRegion->getExit();
+ bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
- BasicBlock *Succ = Term->getSuccessor(0);
- BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
- for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
- PI != PE; ++PI) {
+ DeletedPhis.clear();
+ AddedPhis.clear();
+ Conditions.clear();
+ LoopConds.clear();
- PhiInserter.AddAvailableValue(PI->first, PI->second);
- }
+ PrevNode = 0;
+ Visited.clear();
- Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
+ while (!Order.empty()) {
+ handleLoops(EntryDominatesExit, 0);
}
+
+ if (PrevNode)
+ changeExit(PrevNode, Exit, EntryDominatesExit);
+ else
+ assert(EntryDominatesExit);
}
/// Handle a rare case where the disintegrated nodes instructions
@@ -696,14 +868,21 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
orderNodes();
collectInfos();
createFlow();
- insertConditions();
+ insertConditions(false);
+ insertConditions(true);
+ setPhiValues();
rebuildSSA();
+ // Cleanup
Order.clear();
Visited.clear();
- Predicates.clear();
DeletedPhis.clear();
- FlowsInserted.clear();
+ AddedPhis.clear();
+ Predicates.clear();
+ Conditions.clear();
+ Loops.clear();
+ LoopPreds.clear();
+ LoopConds.clear();
return true;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index cab7884..1973fc6 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -44,7 +44,7 @@ public:
virtual ~AMDGPUSubtarget();
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
+ virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
bool isOverride(AMDGPUDeviceInfo::Caps) const;
bool is64bit() const;
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index d09dc2e..e2f00be 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -102,6 +102,12 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUPeepholeOpt(*TM));
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ // This callbacks this pass uses are not implemented yet on SI.
+ addPass(createAMDGPUIndirectAddressingPass(*TM));
+ }
return false;
}
@@ -116,6 +122,11 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
}
bool AMDGPUPassConfig::addPostRegAlloc() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ addPass(createSIInsertWaits(*TM));
+ }
return false;
}
@@ -132,8 +143,8 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
addPass(&FinalizeMachineBundlesID);
+ addPass(createR600LowerConstCopy(*TM));
} else {
- addPass(createSILowerLiteralConstantsPass(*TM));
addPass(createSILowerControlFlowPass(*TM));
}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index 91f9a83..2afe787 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -15,9 +15,9 @@
#ifndef AMDGPU_TARGET_MACHINE_H
#define AMDGPU_TARGET_MACHINE_H
+#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILFrameLowering.h"
#include "AMDILIntrinsicInfo.h"
#include "R600ISelLowering.h"
#include "llvm/ADT/OwningPtr.h"
diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
index 4e577dc..b39fbdb 100644
--- a/lib/Target/R600/AMDIL.h
+++ b/lib/Target/R600/AMDIL.h
@@ -90,14 +90,30 @@ namespace AMDGPUAS {
enum AddressSpaces {
PRIVATE_ADDRESS = 0, ///< Address space for private memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
+ CONSTANT_ADDRESS = 2, ///< Address space for constant memory
LOCAL_ADDRESS = 3, ///< Address space for local memory.
REGION_ADDRESS = 4, ///< Address space for region memory.
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
- LAST_ADDRESS = 9
+ CONSTANT_BUFFER_0 = 9,
+ CONSTANT_BUFFER_1 = 10,
+ CONSTANT_BUFFER_2 = 11,
+ CONSTANT_BUFFER_3 = 12,
+ CONSTANT_BUFFER_4 = 13,
+ CONSTANT_BUFFER_5 = 14,
+ CONSTANT_BUFFER_6 = 15,
+ CONSTANT_BUFFER_7 = 16,
+ CONSTANT_BUFFER_8 = 17,
+ CONSTANT_BUFFER_9 = 18,
+ CONSTANT_BUFFER_10 = 19,
+ CONSTANT_BUFFER_11 = 20,
+ CONSTANT_BUFFER_12 = 21,
+ CONSTANT_BUFFER_13 = 22,
+ CONSTANT_BUFFER_14 = 23,
+ CONSTANT_BUFFER_15 = 24,
+ LAST_ADDRESS = 25
};
} // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h
index b9a1560..97df98c 100644
--- a/lib/Target/R600/AMDILDevice.h
+++ b/lib/Target/R600/AMDILDevice.h
@@ -104,7 +104,7 @@ public:
static const unsigned int QuarterWavefrontSize = 16;
protected:
virtual void setCaps();
- llvm::BitVector mHWBits;
+ BitVector mHWBits;
llvm::BitVector mSWBits;
AMDGPUSubtarget *mSTM;
uint32_t DeviceFlag;
diff --git a/lib/Target/R600/AMDILFrameLowering.cpp b/lib/Target/R600/AMDILFrameLowering.cpp
deleted file mode 100644
index 9ad495a..0000000
--- a/lib/Target/R600/AMDILFrameLowering.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDGPU target
-/// machine.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDILFrameLowering.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-
-using namespace llvm;
-AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
- int LAO, unsigned TransAl)
- : TargetFrameLowering(D, StackAl, LAO, TransAl) {
-}
-
-AMDGPUFrameLowering::~AMDGPUFrameLowering() {
-}
-
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI);
-}
-
-const TargetFrameLowering::SpillSlot *
-AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
- NumEntries = 0;
- return 0;
-}
-void
-AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
-}
-void
-AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
-}
-bool
-AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const {
- return false;
-}
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index d15ed39..e77b9dc 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -16,10 +16,12 @@
#include "AMDGPURegisterInfo.h"
#include "AMDILDevices.h"
#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include <list>
#include <queue>
@@ -42,9 +44,11 @@ public:
SDNode *Select(SDNode *N);
virtual const char *getPassName() const;
+ virtual void PostprocessISelDAG();
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
+ bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
@@ -67,9 +71,11 @@ private:
static bool isLocalLoad(const LoadSDNode *N);
static bool isRegionLoad(const LoadSDNode *N);
- bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
- bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg, SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@@ -156,16 +162,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
- case ISD::FrameIndex: {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
- unsigned int FI = FIN->getIndex();
- EVT OpVT = N->getValueType(0);
- unsigned int NewOpc = AMDGPU::COPY;
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
- }
- break;
- }
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
@@ -224,7 +220,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
continue;
}
} else {
- if (!TII->isALUInstr(Use->getMachineOpcode())) {
+ if (!TII->isALUInstr(Use->getMachineOpcode()) ||
+ (TII->get(Use->getMachineOpcode()).TSFlags &
+ R600_InstFlag::VECTOR)) {
continue;
}
@@ -259,7 +257,116 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
}
- return SelectCode(N);
+ SDNode *Result = SelectCode(N);
+
+ // Fold operands of selected node
+
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ if (Result && Result->isMachineOpcode() &&
+ !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
+ && TII->isALUInstr(Result->getMachineOpcode())) {
+ // Fold FNEG/FABS/CONST_ADDRESS
+ // TODO: Isel can generate multiple MachineInst, we need to recursively
+ // parse Result
+ bool IsModified = false;
+ do {
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+ IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
+ if (IsModified) {
+ Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
+ }
+ } while (IsModified);
+
+ // If node has a single use which is CLAMP_R600, folds it
+ if (Result->hasOneUse() && Result->isMachineOpcode()) {
+ SDNode *PotentialClamp = *Result->use_begin();
+ if (PotentialClamp->isMachineOpcode() &&
+ PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
+ unsigned ClampIdx =
+ TII->getOperandIdx(Result->getMachineOpcode(), R600Operands::CLAMP);
+ std::vector<SDValue> Ops;
+ unsigned NumOp = Result->getNumOperands();
+ for (unsigned i = 0; i < NumOp; ++i) {
+ Ops.push_back(Result->getOperand(i));
+ }
+ Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ Result = CurDAG->SelectNodeTo(PotentialClamp,
+ Result->getMachineOpcode(), PotentialClamp->getVTList(),
+ Ops.data(), NumOp);
+ }
+ }
+ }
+ }
+
+ return Result;
+}
+
+bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
+ const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2)
+ };
+ int SelIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG),
+ TII->getOperandIdx(Opcode, R600Operands::SRC2_NEG)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS),
+ TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS),
+ -1
+ };
+
+ for (unsigned i = 0; i < 3; i++) {
+ if (OperandIdx[i] < 0)
+ return false;
+ SDValue Operand = Ops[OperandIdx[i] - 1];
+ switch (Operand.getOpcode()) {
+ case AMDGPUISD::CONST_ADDRESS: {
+ if (i == 2)
+ break;
+ SDValue CstOffset;
+ if (!Operand.getValueType().isVector() &&
+ SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
+ }
+ }
+ break;
+ case ISD::FNEG:
+ if (NegIdx[i] < 0)
+ break;
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::FABS:
+ if (AbsIdx[i] < 0)
+ break;
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
+ return true;
+ case ISD::BITCAST:
+ Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
@@ -406,41 +513,23 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
///==== AMDGPU Functions ====///
-bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
- SDValue& Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
+bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue& IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
+ return true;
}
+ return false;
+}
-
- if (Addr.getOpcode() == ISD::ADD) {
- bool Match = false;
-
- // Find the base ptr and the offset
- for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
- SDValue Arg = Addr.getOperand(i);
- ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
- // This arg isn't a constant so it must be the base PTR.
- if (!OffsetNode) {
- Base = Addr.getOperand(i);
- continue;
- }
- // Check if the constant argument fits in 8-bits. The offset is in bytes
- // so we need to convert it to dwords.
- if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
- Match = true;
- Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
- MVT::i32);
- }
- }
- return Match;
+bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue& BaseReg, SDValue &Offset) {
+ if (!dyn_cast<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, true);
+ return true;
}
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
+ return false;
}
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
@@ -470,16 +559,39 @@ bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
return true;
}
-bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
- SDValue& Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress ||
- Addr.getOpcode() != ISD::ADD) {
- return false;
+bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
}
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
-
return true;
}
+
+void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
+
+ // Go over all selected nodes and try to fold them a bit more
+ const AMDGPUTargetLowering& Lowering = ((const AMDGPUTargetLowering&)TLI);
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+
+ MachineSDNode *Node = dyn_cast<MachineSDNode>(I);
+ if (!Node)
+ continue;
+
+ SDNode *ResNode = Lowering.PostISelFolding(Node, *CurDAG);
+ if (ResNode != Node)
+ ReplaceUses(Node, ResNode);
+ }
+}
+
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
index 2e60adc..f65e1f3 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -220,9 +220,9 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
setSelectIsExpensive(true);
setJumpIsExpensive(true);
- maxStoresPerMemcpy = 4096;
- maxStoresPerMemmove = 4096;
- maxStoresPerMemset = 4096;
+ MaxStoresPerMemcpy = 4096;
+ MaxStoresPerMemmove = 4096;
+ MaxStoresPerMemset = 4096;
}
@@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
index e969bbf..110f147 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -116,7 +116,6 @@ def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
//===--------------------------------------------------------------------===//
// Floating point math functions
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
//===----------------------------------------------------------------------===//
// Integer functions
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
index 3f9e20f..6ec3559 100644
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -92,12 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
TernaryIntInt;
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
BinaryIntInt;
- def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
- TernaryIntInt;
- def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
- TernaryIntInt;
- def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
- TernaryIntFloat;
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
BinaryIntInt;
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
@@ -110,10 +104,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
BinaryIntInt;
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
BinaryIntInt;
- def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
- TernaryIntInt;
- def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
- TernaryIntInt;
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
BinaryIntInt;
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
index a3d30af..3a28038 100644
--- a/lib/Target/R600/AMDILPeepholeOptimizer.cpp
+++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
@@ -366,7 +366,7 @@ AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
std::string buffer(F->getName().str() + "_noret");
F = dyn_cast<Function>(
F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
- atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
+ atomicFuncs.push_back(std::make_pair(CI, F));
}
if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
@@ -613,7 +613,7 @@ AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
Function *Func =
dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
+ getOrInsertFunction(StringRef(name), funcType));
Value *Operands[4] = {
width,
offset,
@@ -777,7 +777,7 @@ AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
// Lets create the function.
Function *Func =
dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
+ getOrInsertFunction(StringRef(name), funcType));
Value *Operands[3] = {
ShiftInst->getOperand(0),
shiftValConst,
@@ -967,7 +967,7 @@ AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
}
Function *Func = dyn_cast<Function>(
CI->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
+ getOrInsertFunction(StringRef(name), funcType));
Value *Operands[3] = {
CI->getOperand(0),
CI->getOperand(1),
@@ -999,7 +999,7 @@ AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
}
Function *Func = dyn_cast<Function>(
CI->getParent()->getParent()->getParent()->
- getOrInsertFunction(llvm::StringRef(name), funcType));
+ getOrInsertFunction(StringRef(name), funcType));
Value *Operands[2] = {
CI->getOperand(0),
CI->getOperand(1)
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index ce0b56b..00f8b10 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
AMDILDevice.cpp
AMDILDeviceInfo.cpp
AMDILEvergreenDevice.cpp
- AMDILFrameLowering.cpp
AMDILIntrinsicInfo.cpp
AMDILISelDAGToDAG.cpp
AMDILISelLowering.cpp
@@ -25,6 +24,8 @@ add_llvm_target(R600CodeGen
AMDILPeepholeOptimizer.cpp
AMDILSIDevice.cpp
AMDGPUAsmPrinter.cpp
+ AMDGPUFrameLowering.cpp
+ AMDGPUIndirectAddressing.cpp
AMDGPUMCInstLower.cpp
AMDGPUSubtarget.cpp
AMDGPUStructurizeCFG.cpp
@@ -36,13 +37,14 @@ add_llvm_target(R600CodeGen
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
+ R600LowerConstCopy.cpp
R600MachineFunctionInfo.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp
SIAssignInterpRegs.cpp
+ SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
- SILowerLiteralConstants.cpp
SILowerControlFlow.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index e6c550b..10547a5 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -11,6 +11,7 @@
#include "AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
using namespace llvm;
@@ -35,11 +36,29 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << Op.getImm();
} else if (Op.isFPImm()) {
O << Op.getFPImm();
+ } else if (Op.isExpr()) {
+ const MCExpr *Exp = Op.getExpr();
+ Exp->print(O);
} else {
assert(!"unknown operand type in printOperand");
}
}
+void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNum).getImm();
+
+ if (Imm == 2) {
+ O << "P0";
+ } else if (Imm == 1) {
+ O << "P20";
+ } else if (Imm == 0) {
+ O << "P10";
+ } else {
+ assert(!"Invalid interpolation parameter slot");
+ }
+}
+
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
printOperand(MI, OpNo, O);
@@ -105,10 +124,7 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.getImm() != 0) {
- O << " + " << Op.getImm();
- }
+ printIfSet(MI, OpNo, O, "+");
}
void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
@@ -129,4 +145,28 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const char * chans = "XYZW";
+ int sel = MI->getOperand(OpNo).getImm();
+
+ int chan = sel & 3;
+ sel >>= 2;
+
+ if (sel >= 512) {
+ sel -= 512;
+ int cb = sel >> 12;
+ sel &= 4095;
+ O << cb << "[" << sel << "]";
+ } else if (sel >= 448) {
+ sel -= 448;
+ O << sel;
+ } else if (sel >= 0){
+ O << sel;
+ }
+
+ if (sel >= 0)
+ O << "." << chans[chan];
+}
+
#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 96e0e46..767a708 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -33,6 +33,7 @@ public:
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -45,6 +46,7 @@ private:
void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 9d0d6cf..8721f80 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -42,17 +42,6 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const {
return 0;
}
- virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
- return Value;
- }
- virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
- virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return 0;
- }
};
} // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 36deae9..d207160 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -38,8 +38,8 @@ using namespace llvm;
namespace {
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
- R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
+ R600MCCodeEmitter(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const R600MCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
const MCSubtargetInfo &STI;
@@ -63,8 +63,8 @@ private:
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
- void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
- raw_ostream &OS) const;
+ void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
+ raw_ostream &OS) const;
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const;
@@ -161,9 +161,12 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case AMDGPU::VTX_READ_PARAM_8_eg:
case AMDGPU::VTX_READ_PARAM_16_eg:
case AMDGPU::VTX_READ_PARAM_32_eg:
+ case AMDGPU::VTX_READ_PARAM_128_eg:
case AMDGPU::VTX_READ_GLOBAL_8_eg:
case AMDGPU::VTX_READ_GLOBAL_32_eg:
- case AMDGPU::VTX_READ_GLOBAL_128_eg: {
+ case AMDGPU::VTX_READ_GLOBAL_128_eg:
+ case AMDGPU::TEX_VTX_CONSTBUF:
+ case AMDGPU::TEX_VTX_TEXBUF : {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
@@ -193,7 +196,6 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
raw_ostream &OS) const {
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
- unsigned NumOperands = MI.getNumOperands();
// Emit instruction type
EmitByte(INSTR_ALU, OS);
@@ -209,19 +211,21 @@ void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
InstWord01 |= ISAOpCode << 1;
}
- unsigned SrcIdx = 0;
- for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
- if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
- OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
- continue;
- }
- EmitSrcISA(MI, OpIdx, InstWord01, OS);
- SrcIdx++;
- }
+ unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
+ MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
- // Emit zeros for unused sources
- for ( ; SrcIdx < 3; SrcIdx++) {
- EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
+ EmitByte(SrcNum, OS);
+
+ const unsigned SrcOps[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL}
+ };
+
+ for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
+ unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
+ unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
+ EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
}
Emit(InstWord01, OS);
@@ -292,34 +296,37 @@ void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
}
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
- uint64_t &Value, raw_ostream &OS) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
+void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
+ unsigned SelOpIdx, raw_ostream &OS) const {
+ const MCOperand &RegMO = MI.getOperand(RegOpIdx);
+ const MCOperand &SelMO = MI.getOperand(SelOpIdx);
+
union {
float f;
uint32_t i;
} InlineConstant;
InlineConstant.i = 0;
- // Emit the source select (2 bytes). For GPRs, this is the register index.
- // For other potential instruction operands, (e.g. constant registers) the
- // value of the source select is defined in the r600isa docs.
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
+ // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
+ // and select is 0 (GPR index is encoded in the instr encoding. For constants
+ // type is 1 and select is the original const select passed from the driver.
+ unsigned Reg = RegMO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ EmitByte(1, OS);
+ uint32_t Sel = SelMO.getImm();
+ Emit(Sel, OS);
+ } else {
+ EmitByte(0, OS);
+ Emit((uint32_t)0, OS);
+ }
- if (Reg == AMDGPU::ALU_LITERAL_X) {
- unsigned ImmOpIndex = MI.getNumOperands() - 1;
- MCOperand ImmOp = MI.getOperand(ImmOpIndex);
- if (ImmOp.isFPImm()) {
- InlineConstant.f = ImmOp.getFPImm();
- } else {
- assert(ImmOp.isImm());
- InlineConstant.i = ImmOp.getImm();
- }
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned ImmOpIndex = MI.getNumOperands() - 1;
+ MCOperand ImmOp = MI.getOperand(ImmOpIndex);
+ if (ImmOp.isFPImm()) {
+ InlineConstant.f = ImmOp.getFPImm();
+ } else {
+ assert(ImmOp.isImm());
+ InlineConstant.i = ImmOp.getImm();
}
}
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index b4bdb25..6cc0077 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -24,46 +24,33 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
-#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
-#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
-
-// These must be kept in sync with SIInstructions.td and also the
-// InstrEncodingInfo array in SIInstrInfo.cpp.
-//
-// NOTE: This enum is only used to identify the encoding type within LLVM,
-// the actual encoding type that is part of the instruction format is different
-namespace SIInstrEncodingType {
- enum Encoding {
- EXP = 0,
- LDS = 1,
- MIMG = 2,
- MTBUF = 3,
- MUBUF = 4,
- SMRD = 5,
- SOP1 = 6,
- SOP2 = 7,
- SOPC = 8,
- SOPK = 9,
- SOPP = 10,
- VINTRP = 11,
- VOP1 = 12,
- VOP2 = 13,
- VOP3 = 14,
- VOPC = 15
- };
-}
-
using namespace llvm;
namespace {
+
+/// \brief Helper type used in encoding
+typedef union {
+ int32_t I;
+ float F;
+} IntFloatUnion;
+
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
- SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
+ SIMCCodeEmitter(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
const MCSubtargetInfo &STI;
MCContext &Ctx;
+ /// \brief Encode a sequence of registers with the correct alignment.
+ unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
+
+ /// \brief Can this operand also contain immediate values?
+ bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
+
+ /// \brief Encode an fp or int literal
+ uint32_t getLitEncoding(const MCOperand &MO) const;
+
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
const MCSubtargetInfo &sti, MCContext &ctx)
@@ -79,11 +66,6 @@ public:
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const;
-public:
-
- /// \brief Encode a sequence of registers with the correct alignment.
- unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
-
/// \brief Encoding for when 2 consecutive registers are used
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
@@ -91,73 +73,142 @@ public:
/// \brief Encoding for when 4 consectuive registers are used
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixup) const;
+};
- /// \brief Encoding for SMRD indexed loads
- virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const;
+} // End anonymous namespace
+
+MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+}
- /// \brief Post-Encoder method for VOP instructions
- virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
+bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
+ unsigned OpNo) const {
-private:
+ unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
+ return (AMDGPU::SSrc_32RegClassID == RegClass) ||
+ (AMDGPU::SSrc_64RegClassID == RegClass) ||
+ (AMDGPU::VSrc_32RegClassID == RegClass) ||
+ (AMDGPU::VSrc_64RegClassID == RegClass);
+}
- /// \returns this SIInstrEncodingType for this instruction.
- unsigned getEncodingType(const MCInst &MI) const;
+uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const {
- /// \brief Get then size in bytes of this instructions encoding.
- unsigned getEncodingBytes(const MCInst &MI) const;
+ IntFloatUnion Imm;
+ if (MO.isImm())
+ Imm.I = MO.getImm();
+ else if (MO.isFPImm())
+ Imm.F = MO.getFPImm();
+ else
+ return ~0;
- /// \returns the hardware encoding for a register
- unsigned getRegBinaryCode(unsigned reg) const;
+ if (Imm.I >= 0 && Imm.I <= 64)
+ return 128 + Imm.I;
- /// \brief Generated function that returns the hardware encoding for
- /// a register
- unsigned getHWRegNum(unsigned reg) const;
+ if (Imm.I >= -16 && Imm.I <= -1)
+ return 192 + abs(Imm.I);
-};
+ if (Imm.F == 0.5f)
+ return 240;
-} // End anonymous namespace
+ if (Imm.F == -0.5f)
+ return 241;
-MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new SIMCCodeEmitter(MCII, MRI, STI, Ctx);
+ if (Imm.F == 1.0f)
+ return 242;
+
+ if (Imm.F == -1.0f)
+ return 243;
+
+ if (Imm.F == 2.0f)
+ return 244;
+
+ if (Imm.F == -2.0f)
+ return 245;
+
+ if (Imm.F == 4.0f)
+ return 246;
+
+ if (Imm.F == -4.0f)
+ return 247;
+
+ return 255;
}
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
+
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
- unsigned bytes = getEncodingBytes(MI);
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ unsigned bytes = Desc.getSize();
+
for (unsigned i = 0; i < bytes; i++) {
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
}
+
+ if (bytes > 4)
+ return;
+
+ // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+ for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+
+ // Check if this operand should be encoded as [SV]Src
+ if (!isSrcOperand(Desc, i))
+ continue;
+
+ // Is this operand a literal immediate?
+ const MCOperand &Op = MI.getOperand(i);
+ if (getLitEncoding(Op) != 255)
+ continue;
+
+ // Yes! Encode it
+ IntFloatUnion Imm;
+ if (Op.isImm())
+ Imm.I = Op.getImm();
+ else
+ Imm.F = Op.getFPImm();
+
+ for (unsigned j = 0; j < 4; j++) {
+ OS.write((uint8_t) ((Imm.I >> (8 * j)) & 0xff));
+ }
+
+ // Only one literal value allowed
+ break;
+ }
}
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
- if (MO.isReg()) {
- return getRegBinaryCode(MO.getReg());
- } else if (MO.isImm()) {
- return MO.getImm();
- } else if (MO.isFPImm()) {
- // XXX: Not all instructions can use inline literals
- // XXX: We should make sure this is a 32-bit constant
- union {
- float F;
- uint32_t I;
- } Imm;
- Imm.F = MO.getFPImm();
- return Imm.I;
- } else if (MO.isExpr()) {
+ if (MO.isReg())
+ return MRI.getEncodingValue(MO.getReg());
+
+ if (MO.isExpr()) {
const MCExpr *Expr = MO.getExpr();
MCFixupKind Kind = MCFixupKind(FK_PCRel_4);
Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
return 0;
- } else{
- llvm_unreachable("Encoding of this operand type is not supported yet.");
}
+
+ // Figure out the operand number, needed for isSrcOperand check
+ unsigned OpNo = 0;
+ for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
+ if (&MO == &MI.getOperand(OpNo))
+ break;
+ }
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (isSrcOperand(Desc, OpNo)) {
+ uint32_t Enc = getLitEncoding(MO);
+ if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+ return Enc;
+
+ } else if (MO.isImm())
+ return MO.getImm();
+
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
return 0;
}
@@ -167,10 +218,10 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
unsigned shift) const {
- unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
- return regCode >> shift;
- return 0;
+ unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg());
+ return (regCode & 0xff) >> shift;
}
+
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
unsigned OpNo ,
SmallVectorImpl<MCFixup> &Fixup) const {
@@ -182,117 +233,3 @@ unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixup) const {
return GPRAlign(MI, OpNo, 2);
}
-
-#define SMRD_OFFSET_MASK 0xff
-#define SMRD_IMM_SHIFT 8
-#define SMRD_SBASE_MASK 0x3f
-#define SMRD_SBASE_SHIFT 9
-/// This function is responsibe for encoding the offset
-/// and the base ptr for SMRD instructions it should return a bit string in
-/// this format:
-///
-/// OFFSET = bits{7-0}
-/// IMM = bits{8}
-/// SBASE = bits{14-9}
-///
-uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixup) const {
- uint32_t Encoding;
-
- const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
-
- //XXX: Use this function for SMRD loads with register offsets
- assert(OffsetOp.isImm());
-
- Encoding =
- (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
- | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
- | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
- ;
-
- return Encoding;
-}
-
-//===----------------------------------------------------------------------===//
-// Post Encoder Callbacks
-//===----------------------------------------------------------------------===//
-
-uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
- unsigned encodingType = getEncodingType(MI);
- unsigned numSrcOps;
- unsigned vgprBitOffset;
-
- if (encodingType == SIInstrEncodingType::VOP3) {
- numSrcOps = 3;
- vgprBitOffset = 32;
- } else {
- numSrcOps = 1;
- vgprBitOffset = 0;
- }
-
- // Add one to skip over the destination reg operand.
- for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
- const MCOperand &MO = MI.getOperand(opIdx);
- if (MO.isReg()) {
- unsigned reg = MI.getOperand(opIdx).getReg();
- if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
- AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
- Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
- }
- } else if (MO.isFPImm()) {
- union {
- float f;
- uint32_t i;
- } Imm;
- // XXX: Not all instructions can use inline literals
- // XXX: We should make sure this is a 32-bit constant
- Imm.f = MO.getFPImm();
- Value |= ((uint64_t)Imm.i) << 32;
- }
- }
- return Value;
-}
-
-//===----------------------------------------------------------------------===//
-// Encoding helper functions
-//===----------------------------------------------------------------------===//
-
-unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
- return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
-}
-
-unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
-
- // These instructions aren't real instructions with an encoding type, so
- // we need to manually specify their size.
- switch (MI.getOpcode()) {
- default: break;
- case AMDGPU::SI_LOAD_LITERAL_I32:
- case AMDGPU::SI_LOAD_LITERAL_F32:
- return 4;
- }
-
- unsigned encoding_type = getEncodingType(MI);
- switch (encoding_type) {
- case SIInstrEncodingType::EXP:
- case SIInstrEncodingType::LDS:
- case SIInstrEncodingType::MUBUF:
- case SIInstrEncodingType::MTBUF:
- case SIInstrEncodingType::MIMG:
- case SIInstrEncodingType::VOP3:
- return 8;
- default:
- return 4;
- }
-}
-
-
-unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
- switch (reg) {
- case AMDGPU::M0: return 124;
- case AMDGPU::SREG_LIT_0: return 128;
- case AMDGPU::SI_LITERAL_CONSTANT: return 255;
- default: return MRI.getEncodingValue(reg);
- }
-}
-
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 3dc1ecd..868810c 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -13,6 +13,7 @@
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
: Processor<Name, itin, Features>;
+def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>;
def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
def : Proc<"rv710", R600_EG_Itin, []>;
def : Proc<"rv730", R600_EG_Itin, []>;
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index 7dea8e4..16cfcf5 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -49,6 +49,9 @@ namespace R600_InstFlag {
#define HW_REG_MASK 0x1ff
#define HW_CHAN_SHIFT 9
+#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
+#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+
namespace R600Operands {
enum Ops {
DST,
@@ -62,18 +65,33 @@ namespace R600Operands {
SRC0_NEG,
SRC0_REL,
SRC0_ABS,
+ SRC0_SEL,
SRC1,
SRC1_NEG,
SRC1_REL,
SRC1_ABS,
+ SRC1_SEL,
SRC2,
SRC2_NEG,
SRC2_REL,
+ SRC2_SEL,
LAST,
PRED_SEL,
IMM,
COUNT
};
+
+ const static int ALUOpTable[3][R600Operands::COUNT] = {
+// W C S S S S S S S S S S S
+// R O D L S R R R R S R R R R S R R R L P
+// D U I M R A R C C C C R C C C C R C C C A R I
+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+ };
+
}
#endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index b903d4a..f8c900f 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -55,118 +55,6 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
return new R600ExpandSpecialInstrsPass(TM);
}
-bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) {
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
- if (MI.getOpcode() != AMDGPU::input_perspective)
- return false;
-
- MachineBasicBlock::iterator I = &MI;
- unsigned DstReg = MI.getOperand(0).getReg();
- R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
- ->getInfo<R600MachineFunctionInfo>();
- unsigned IJIndexBase;
-
- // In Evergreen ISA doc section 8.3.2 :
- // We need to interpolate XY and ZW in two different instruction groups.
- // An INTERP_* must occupy all 4 slots of an instruction group.
- // Output of INTERP_XY is written in X,Y slots
- // Output of INTERP_ZW is written in Z,W slots
- //
- // Thus interpolation requires the following sequences :
- //
- // AnyGPR.x = INTERP_ZW; (Write Masked Out)
- // AnyGPR.y = INTERP_ZW; (Write Masked Out)
- // DstGPR.z = INTERP_ZW;
- // DstGPR.w = INTERP_ZW; (End of first IG)
- // DstGPR.x = INTERP_XY;
- // DstGPR.y = INTERP_XY;
- // AnyGPR.z = INTERP_XY; (Write Masked Out)
- // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
- //
- switch (MI.getOperand(1).getImm()) {
- case 0:
- IJIndexBase = MFI->GetIJPerspectiveIndex();
- break;
- case 1:
- IJIndexBase = MFI->GetIJLinearIndex();
- break;
- default:
- assert(0 && "Unknow ij index");
- }
-
- for (unsigned i = 0; i < 8; i++) {
- unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
- 2 * IJIndexBase + ((i + 1) % 2));
- unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
- MI.getOperand(2).getImm());
-
-
- unsigned Sel = AMDGPU::sel_x;
- switch (i % 4) {
- case 0:Sel = AMDGPU::sel_x;break;
- case 1:Sel = AMDGPU::sel_y;break;
- case 2:Sel = AMDGPU::sel_z;break;
- case 3:Sel = AMDGPU::sel_w;break;
- default:break;
- }
-
- unsigned Res = TRI.getSubReg(DstReg, Sel);
-
- unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
-
- MachineBasicBlock &MBB = *(MI.getParent());
- MachineInstr *NewMI =
- TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
-
- if (!(i> 1 && i < 6)) {
- TII->addFlag(NewMI, 0, MO_FLAG_MASK);
- }
-
- if (i % 4 != 3)
- TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
-
- return true;
-}
-
-bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) {
- const R600RegisterInfo &TRI = TII->getRegisterInfo();
- if (MI.getOpcode() != AMDGPU::input_constant)
- return false;
-
- MachineBasicBlock::iterator I = &MI;
- unsigned DstReg = MI.getOperand(0).getReg();
-
- for (unsigned i = 0; i < 4; i++) {
- unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
- MI.getOperand(1).getImm());
-
- unsigned Sel = AMDGPU::sel_x;
- switch (i % 4) {
- case 0:Sel = AMDGPU::sel_x;break;
- case 1:Sel = AMDGPU::sel_y;break;
- case 2:Sel = AMDGPU::sel_z;break;
- case 3:Sel = AMDGPU::sel_w;break;
- default:break;
- }
-
- unsigned Res = TRI.getSubReg(DstReg, Sel);
-
- MachineBasicBlock &MBB = *(MI.getParent());
- MachineInstr *NewMI = TII->buildDefaultInstruction(
- MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
-
- if (i % 4 != 3)
- TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
- }
-
- MI.eraseFromParent();
-
- return true;
-}
-
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
@@ -200,7 +88,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
continue;
}
- case AMDGPU::BREAK:
+ case AMDGPU::BREAK: {
MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
AMDGPU::PRED_SETE_INT,
AMDGPU::PREDICATE_BIT,
@@ -214,12 +102,87 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
.addReg(AMDGPU::PREDICATE_BIT);
MI.eraseFromParent();
continue;
- }
+ }
- if (ExpandInputPerspective(MI))
- continue;
- if (ExpandInputConstant(MI))
- continue;
+ case AMDGPU::INTERP_PAIR_XY: {
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstReg;
+
+ if (Chan < 2)
+ DstReg = MI.getOperand(Chan).getReg();
+ else
+ DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
+
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
+ DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan >= 2)
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_PAIR_ZW: {
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(2).getImm());
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ unsigned DstReg;
+
+ if (Chan < 2)
+ DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
+ else
+ DstReg = MI.getOperand(Chan-2).getReg();
+
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
+ DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
+
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan < 2)
+ TII->addFlag(BMI, 0, MO_FLAG_MASK);
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+
+ case AMDGPU::INTERP_VEC_LOAD: {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ MachineInstr *BMI;
+ unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
+ MI.getOperand(1).getImm());
+ unsigned DstReg = MI.getOperand(0).getReg();
+
+ for (unsigned Chan = 0; Chan < 4; ++Chan) {
+ BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
+ TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
+ if (Chan > 0) {
+ BMI->bundleWithPred();
+ }
+ if (Chan != 3)
+ TII->addFlag(BMI, 0, MO_FLAG_NOT_LAST);
+ }
+
+ MI.eraseFromParent();
+ continue;
+ }
+ }
bool IsReduction = TII->isReductionOp(MI.getOpcode());
bool IsVector = TII->isVector(MI);
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index f0eece3..b5c2a93 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -16,6 +16,7 @@
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -71,10 +72,27 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ // Legalize loads and stores to the private address space.
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::i8, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::SELECT_CC);
setSchedulingPreference(Sched::VLIW);
}
@@ -115,15 +133,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
- case AMDGPU::R600_LOAD_CONST: {
- int64_t RegIndex = MI->getOperand(1).getImm();
- unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
- .addOperand(MI->getOperand(0))
- .addReg(ConstantReg);
- break;
- }
-
case AMDGPU::MASK_WRITE: {
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
@@ -154,18 +163,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
- case AMDGPU::RESERVE_REG: {
- R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
- int64_t ReservedIndex = MI->getOperand(0).getImm();
- unsigned ReservedReg =
- AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
- MFI->ReservedRegs.push_back(ReservedReg);
- unsigned SuperReg =
- AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
- MFI->ReservedRegs.push_back(SuperReg);
- break;
- }
-
case AMDGPU::TXD: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
@@ -250,33 +247,26 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
- case AMDGPU::input_perspective: {
- R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
-
- // XXX Be more fine about register reservation
- for (unsigned i = 0; i < 4; i ++) {
- unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
- MFI->ReservedRegs.push_back(ReservedReg);
- }
-
- switch (MI->getOperand(1).getImm()) {
- case 0:// Perspective
- MFI->HasPerspectiveInterpolation = true;
- break;
- case 1:// Linear
- MFI->HasLinearInterpolation = true;
- break;
- default:
- assert(0 && "Unknow ij index");
- }
-
- return BB;
- }
-
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: {
+ // Instruction is left unmodified if its not the last one of its type
+ bool isLastInstructionOfItsType = true;
+ unsigned InstExportType = MI->getOperand(1).getImm();
+ for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+ EndBlock = BB->end(); NextExportInst != EndBlock;
+ NextExportInst = llvm::next(NextExportInst)) {
+ if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
+ NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+ unsigned CurrentInstExportType = NextExportInst->getOperand(1)
+ .getImm();
+ if (CurrentInstExportType == InstExportType) {
+ isLastInstructionOfItsType = false;
+ break;
+ }
+ }
+ }
bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
- if (!EOP)
+ if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
@@ -288,9 +278,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addOperand(MI->getOperand(5))
.addOperand(MI->getOperand(6))
.addImm(CfInst)
- .addImm(1);
+ .addImm(EOP);
break;
}
+ case AMDGPU::RETURN: {
+ // RETURN instructions must have the live-out registers as implicit uses,
+ // otherwise they appear dead.
+ R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ MachineInstrBuilder MIB(*MF, MI);
+ for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
+ MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
+ return BB;
+ }
}
MI->eraseFromParent();
@@ -304,57 +303,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
using namespace llvm::Intrinsic;
using namespace llvm::AMDGPUIntrinsic;
-static SDValue
-InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
- unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
- SDValue Scalar, SDValue Chain) {
- if (!ExportMap[Slot]) {
- SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
- DL, MVT::v4f32,
- DAG.getUNDEF(MVT::v4f32),
- Scalar,
- DAG.getConstant(Channel, MVT::i32));
-
- unsigned Mask = 1 << Channel;
-
- const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
- DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
- DAG.getConstant(Mask, MVT::i32)};
-
- SDValue Res = DAG.getNode(
- AMDGPUISD::EXPORT,
- DL,
- MVT::Other,
- Ops, 6);
- ExportMap[Slot] = Res.getNode();
- return Res;
- }
-
- SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
- SDValue PreviousVector = ExportInstruction->getOperand(1);
- SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
- DL, MVT::v4f32,
- PreviousVector,
- Scalar,
- DAG.getConstant(Channel, MVT::i32));
-
- unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
- ->getZExtValue();
- Mask |= (1 << Channel);
-
- const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
- DAG.getConstant(Inst, MVT::i32),
- DAG.getConstant(Type, MVT::i32),
- DAG.getConstant(Slot, MVT::i32),
- DAG.getConstant(Mask, MVT::i32)};
-
- DAG.UpdateNodeOperands(ExportInstruction,
- Ops, 6);
-
- return Chain;
-
-}
-
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
@@ -364,7 +312,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::FPOW: return LowerFPOW(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
@@ -372,58 +322,27 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
switch (IntrinsicID) {
case AMDGPUIntrinsic::AMDGPU_store_output: {
MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- if (!MRI.isLiveOut(Reg)) {
- MRI.addLiveOut(Reg);
- }
+ MFI->LiveOuts.push_back(Reg);
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
}
- case AMDGPUIntrinsic::R600_store_pixel_color: {
- MachineFunction &MF = DAG.getMachineFunction();
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-
- SDNode **OutputsMap = MFI->Outputs;
- return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
- RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
- Chain);
-
+ case AMDGPUIntrinsic::R600_store_swizzle: {
+ const SDValue Args[8] = {
+ Chain,
+ Op.getOperand(2), // Export Value
+ Op.getOperand(3), // ArrayBase
+ Op.getOperand(4), // Type
+ DAG.getConstant(0, MVT::i32), // SWZ_X
+ DAG.getConstant(1, MVT::i32), // SWZ_Y
+ DAG.getConstant(2, MVT::i32), // SWZ_Z
+ DAG.getConstant(3, MVT::i32) // SWZ_W
+ };
+ return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
+ Args, 8);
}
- case AMDGPUIntrinsic::R600_store_stream_output : {
- MachineFunction &MF = DAG.getMachineFunction();
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
- SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
- unsigned Inst;
- switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) {
- // STREAM3
- case 3:
- Inst = 4;
- break;
- // STREAM2
- case 2:
- Inst = 3;
- break;
- // STREAM1
- case 1:
- Inst = 2;
- break;
- // STREAM0
- case 0:
- Inst = 1;
- break;
- default:
- llvm_unreachable("Wrong buffer id for stream outputs !");
- }
- return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
- RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
- Chain);
- }
// default for switch(IntrinsicID)
default: break;
}
@@ -442,38 +361,35 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
}
- case AMDGPUIntrinsic::R600_load_input_perspective: {
- int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (slot < 0)
- return DAG.getUNDEF(MVT::f32);
- SDValue FullVector = DAG.getNode(
- AMDGPUISD::INTERP,
- DL, MVT::v4f32,
- DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
- }
- case AMDGPUIntrinsic::R600_load_input_linear: {
- int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (slot < 0)
- return DAG.getUNDEF(MVT::f32);
- SDValue FullVector = DAG.getNode(
- AMDGPUISD::INTERP,
- DL, MVT::v4f32,
- DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
- }
- case AMDGPUIntrinsic::R600_load_input_constant: {
+
+ case AMDGPUIntrinsic::R600_interp_input: {
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (slot < 0)
- return DAG.getUNDEF(MVT::f32);
- SDValue FullVector = DAG.getNode(
- AMDGPUISD::INTERP_P0,
- DL, MVT::v4f32,
- DAG.getConstant(slot / 4 , MVT::i32));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+ int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+ MachineSDNode *interp;
+ if (ijb < 0) {
+ interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
+ MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
+ return DAG.getTargetExtractSubreg(
+ TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
+ DL, MVT::f32, SDValue(interp, 0));
+ }
+
+ if (slot % 4 < 2)
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+ else
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
+ CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+ AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
+
+ return SDValue(interp, slot % 2);
}
case r600_read_ngroups_x:
@@ -527,6 +443,20 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default: return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+ return;
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
+ case ISD::STORE:
+ SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
+ Results.push_back(SDValue(Node, 0));
+ return;
}
}
@@ -594,6 +524,20 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
false, false, false, 0);
}
+SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
+}
+
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -680,9 +624,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
}
// Try to lower to a SET* instruction:
- // We need all the operands of SELECT_CC to have the same value type, so if
- // necessary we need to change True and False to be the same type as LHS and
- // RHS, and then convert the result of the select_cc back to the correct type.
+ //
+ // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
+ // but for the other case where CompareVT != VT, all operands of
+ // SELECT_CC need to have the same value type, so we need to change True and
+ // False to be the same type as LHS and RHS, and then convert the result of
+ // the select_cc back to the correct type.
// Move hardware True/False values to the correct operand.
if (isHWTrueValue(False) && isHWFalseValue(True)) {
@@ -692,32 +639,17 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
}
if (isHWTrueValue(True) && isHWFalseValue(False)) {
- if (CompareVT != VT) {
- if (VT == MVT::f32 && CompareVT == MVT::i32) {
- SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- // Convert integer values of true (-1) and false (0) to fp values of
- // true (1.0f) and false (0.0f).
- SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
- DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
- } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
- SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- // Convert fp values of true (1.0f) and false (0.0f) to integer values
- // of true (-1) and false (0).
- SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
- return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
- } else {
- // I don't think there will be any other type pairings.
- assert(!"Unhandled operand type parings in SELECT_CC");
- }
+ if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {
+ SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ // Convert integer values of true (-1) and false (0) to fp values of
+ // true (1.0f) and false (0.0f).
+ SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
} else {
// This SELECT_CC is already legal.
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
@@ -808,6 +740,61 @@ SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return Cond;
}
+/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
+/// convert these pointers to a register index. Each register holds
+/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
+/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
+/// for indirect addressing.
+SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
+ unsigned StackWidth,
+ SelectionDAG &DAG) const {
+ unsigned SRLPad;
+ switch(StackWidth) {
+ case 1:
+ SRLPad = 2;
+ break;
+ case 2:
+ SRLPad = 3;
+ break;
+ case 4:
+ SRLPad = 4;
+ break;
+ default: llvm_unreachable("Invalid stack width");
+ }
+
+ return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+ DAG.getConstant(SRLPad, MVT::i32));
+}
+
+void R600TargetLowering::getStackAddress(unsigned StackWidth,
+ unsigned ElemIdx,
+ unsigned &Channel,
+ unsigned &PtrIncr) const {
+ switch (StackWidth) {
+ default:
+ case 1:
+ Channel = 0;
+ if (ElemIdx > 0) {
+ PtrIncr = 1;
+ } else {
+ PtrIncr = 0;
+ }
+ break;
+ case 2:
+ Channel = ElemIdx % 2;
+ if (ElemIdx == 2) {
+ PtrIncr = 1;
+ } else {
+ PtrIncr = 0;
+ }
+ break;
+ case 4:
+ Channel = ElemIdx;
+ PtrIncr = 0;
+ break;
+ }
+}
+
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
@@ -829,9 +816,188 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
return Chain;
}
- return SDValue();
+
+ EVT ValueVT = Value.getValueType();
+
+ if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ // Lowering for indirect addressing
+
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+ getTargetMachine().getFrameLowering());
+ unsigned StackWidth = TFL->getStackWidth(MF);
+
+ Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+ if (ValueVT.isVector()) {
+ unsigned NumElemVT = ValueVT.getVectorNumElements();
+ EVT ElemVT = ValueVT.getVectorElementType();
+ SDValue Stores[4];
+
+ assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+ "vector width in load");
+
+ for (unsigned i = 0; i < NumElemVT; ++i) {
+ unsigned Channel, PtrIncr;
+ getStackAddress(StackWidth, i, Channel, PtrIncr);
+ Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(PtrIncr, MVT::i32));
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
+ Value, DAG.getConstant(i, MVT::i32));
+
+ Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Elem, Ptr,
+ DAG.getTargetConstant(Channel, MVT::i32));
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+ } else {
+ if (ValueVT == MVT::i8) {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
+ }
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
+ }
+
+ return Chain;
+}
+
+// return (512 + (kc_bank << 12)
+static int
+ConstantAddressBlock(unsigned AddressSpace) {
+ switch (AddressSpace) {
+ case AMDGPUAS::CONSTANT_BUFFER_0:
+ return 512;
+ case AMDGPUAS::CONSTANT_BUFFER_1:
+ return 512 + 4096;
+ case AMDGPUAS::CONSTANT_BUFFER_2:
+ return 512 + 4096 * 2;
+ case AMDGPUAS::CONSTANT_BUFFER_3:
+ return 512 + 4096 * 3;
+ case AMDGPUAS::CONSTANT_BUFFER_4:
+ return 512 + 4096 * 4;
+ case AMDGPUAS::CONSTANT_BUFFER_5:
+ return 512 + 4096 * 5;
+ case AMDGPUAS::CONSTANT_BUFFER_6:
+ return 512 + 4096 * 6;
+ case AMDGPUAS::CONSTANT_BUFFER_7:
+ return 512 + 4096 * 7;
+ case AMDGPUAS::CONSTANT_BUFFER_8:
+ return 512 + 4096 * 8;
+ case AMDGPUAS::CONSTANT_BUFFER_9:
+ return 512 + 4096 * 9;
+ case AMDGPUAS::CONSTANT_BUFFER_10:
+ return 512 + 4096 * 10;
+ case AMDGPUAS::CONSTANT_BUFFER_11:
+ return 512 + 4096 * 11;
+ case AMDGPUAS::CONSTANT_BUFFER_12:
+ return 512 + 4096 * 12;
+ case AMDGPUAS::CONSTANT_BUFFER_13:
+ return 512 + 4096 * 13;
+ case AMDGPUAS::CONSTANT_BUFFER_14:
+ return 512 + 4096 * 14;
+ case AMDGPUAS::CONSTANT_BUFFER_15:
+ return 512 + 4096 * 15;
+ default:
+ return -1;
+ }
}
+SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ if (ConstantBlock > -1) {
+ SDValue Result;
+ if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
+ dyn_cast<Constant>(LoadNode->getSrcValue())) {
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+ }
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
+ } else {
+ // non constant ptr cant be folded, keeps it as a v4f32 load
+ Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+ );
+ }
+
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+ DAG.getConstant(0, MVT::i32));
+ }
+
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
+ if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ return SDValue();
+ }
+
+ // Lowering for indirect addressing
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
+ getTargetMachine().getFrameLowering());
+ unsigned StackWidth = TFL->getStackWidth(MF);
+
+ Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
+
+ if (VT.isVector()) {
+ unsigned NumElemVT = VT.getVectorNumElements();
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Loads[4];
+
+ assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
+ "vector width in load");
+
+ for (unsigned i = 0; i < NumElemVT; ++i) {
+ unsigned Channel, PtrIncr;
+ getStackAddress(StackWidth, i, Channel, PtrIncr);
+ Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
+ DAG.getConstant(PtrIncr, MVT::i32));
+ Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
+ Chain, Ptr,
+ DAG.getTargetConstant(Channel, MVT::i32),
+ Op.getOperand(2));
+ }
+ for (unsigned i = NumElemVT; i < 4; ++i) {
+ Loads[i] = DAG.getUNDEF(ElemVT);
+ }
+ EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+ } else {
+ LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32), // Channel
+ Op.getOperand(2));
+ }
+
+ SDValue Ops[2];
+ Ops[0] = LoweredLoad;
+ Ops[1] = Chain;
+
+ return DAG.getMergeValues(Ops, 2, DL);
+}
SDValue R600TargetLowering::LowerFPOW(SDValue Op,
SelectionDAG &DAG) const {
@@ -873,7 +1039,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
AMDGPUAS::PARAM_I_ADDRESS);
SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
DAG.getConstant(ParamOffsetBytes, MVT::i32),
- MachinePointerInfo(new Argument(PtrTy)),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
ArgVT, false, false, ArgBytes);
InVals.push_back(Arg);
ParamOffsetBytes += ArgBytes;
@@ -904,6 +1070,121 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+
+ // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
+ // (i32 select_cc f32, f32, -1, 0 cc)
+ //
+ // Mesa's GLSL frontend generates the above pattern a lot and we can lower
+ // this to one of the SET*_DX10 instructions.
+ case ISD::FP_TO_SINT: {
+ SDValue FNeg = N->getOperand(0);
+ if (FNeg.getOpcode() != ISD::FNEG) {
+ return SDValue();
+ }
+ SDValue SelectCC = FNeg.getOperand(0);
+ if (SelectCC.getOpcode() != ISD::SELECT_CC ||
+ SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
+ SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
+ !isHWTrueValue(SelectCC.getOperand(2)) ||
+ !isHWFalseValue(SelectCC.getOperand(3))) {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+ SelectCC.getOperand(0), // LHS
+ SelectCC.getOperand(1), // RHS
+ DAG.getConstant(-1, MVT::i32), // True
+ DAG.getConstant(0, MVT::i32), // Flase
+ SelectCC.getOperand(4)); // CC
+
+ break;
+ }
+ // Extract_vec (Build_vector) generated by custom lowering
+ // also needs to be customly combined
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Arg = N->getOperand(0);
+ if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return Arg->getOperand(Element);
+ }
+ }
+ if (Arg.getOpcode() == ISD::BITCAST &&
+ Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned Element = Const->getZExtValue();
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
+ Arg->getOperand(0).getOperand(Element));
+ }
+ }
+ }
+
+ case ISD::SELECT_CC: {
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
+ // selectcc x, y, a, b, inv(cc)
+ SDValue LHS = N->getOperand(0);
+ if (LHS.getOpcode() != ISD::SELECT_CC) {
+ return SDValue();
+ }
+
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+
+ if (LHS.getOperand(2).getNode() != True.getNode() ||
+ LHS.getOperand(3).getNode() != False.getNode() ||
+ RHS.getNode() != False.getNode() ||
+ cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
+ return SDValue();
+ }
+
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
+ CCOpcode = ISD::getSetCCInverse(
+ CCOpcode, LHS.getOperand(0).getValueType().isInteger());
+ return DAG.getSelectCC(N->getDebugLoc(),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ CCOpcode);
+ }
+ case AMDGPUISD::EXPORT: {
+ SDValue Arg = N->getOperand(1);
+ if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+ break;
+ SDValue NewBldVec[4] = {
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32)
+ };
+ SDValue NewArgs[8] = {
+ N->getOperand(0), // Chain
+ SDValue(),
+ N->getOperand(2), // ArrayBase
+ N->getOperand(3), // Type
+ N->getOperand(4), // SWZ_X
+ N->getOperand(5), // SWZ_Y
+ N->getOperand(6), // SWZ_Z
+ N->getOperand(7) // SWZ_W
+ };
+ for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
+ if (C->isZero()) {
+ NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
+ } else if (C->isExactlyValue(1.0)) {
+ NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ }
+ DebugLoc DL = N->getDebugLoc();
+ NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
+ }
}
return SDValue();
}
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 2b954da..afa3897 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -63,7 +63,13 @@ private:
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
+ SelectionDAG &DAG) const;
+ void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
+ unsigned &Channel, unsigned &PtrIncr) const;
bool isZero(SDValue Op) const;
};
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 06b78d0..7e3f005 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -16,8 +16,11 @@
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_INSTRINFO_CTOR
#include "AMDGPUGenDFAPacketizer.inc"
@@ -104,7 +107,6 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
switch (Opcode) {
default: return false;
case AMDGPU::RETURN:
- case AMDGPU::RESERVE_REG:
return true;
}
}
@@ -466,6 +468,124 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
+int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = 0;
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ if (MRI.livein_empty()) {
+ return 0;
+ }
+
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ LE = MRI.livein_end();
+ LI != LE; ++LI) {
+ Offset = std::max(Offset,
+ GET_REG_INDEX(RI.getEncodingValue(LI->first)));
+ }
+
+ return Offset + 1;
+}
+
+int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ int Offset = 0;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Variable sized objects are not supported
+ assert(!MFI->hasVarSizedObjects());
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+ return getIndirectIndexBegin(MF) + Offset;
+}
+
+std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+ const MachineFunction &MF) const {
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+ std::vector<unsigned> Regs;
+
+ unsigned StackWidth = TFL->getStackWidth(MF);
+ int End = getIndirectIndexEnd(MF);
+
+ if (End == -1) {
+ return Regs;
+ }
+
+ for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
+ unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
+ Regs.push_back(SuperReg);
+ for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
+ unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+ Regs.push_back(Reg);
+ }
+ }
+ return Regs;
+}
+
+unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ // XXX: Remove when we support a stack width > 2
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const {
+ return &AMDGPU::R600_TReg32RegClass;
+}
+
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
+ return &AMDGPU::TRegMemRegClass;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const {
+ unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+ AMDGPU::AR_X, OffsetReg);
+ setImmOperand(MOVA, R600Operands::WRITE, 0);
+
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ AddrReg, ValueReg)
+ .addReg(AMDGPU::AR_X, RegState::Implicit);
+ setImmOperand(Mov, R600Operands::DST_REL, 1);
+ return Mov;
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const {
+ unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
+ AMDGPU::AR_X,
+ OffsetReg);
+ setImmOperand(MOVA, R600Operands::WRITE, 0);
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ ValueReg,
+ AddrReg)
+ .addReg(AMDGPU::AR_X, RegState::Implicit);
+ setImmOperand(Mov, R600Operands::SRC0_REL, 1);
+
+ return Mov;
+}
+
+const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
+ return &AMDGPU::IndirectRegRegClass;
+}
+
+
MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Opcode,
@@ -486,13 +606,15 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
.addReg(Src0Reg) // $src0
.addImm(0) // $src0_neg
.addImm(0) // $src0_rel
- .addImm(0); // $src0_abs
+ .addImm(0) // $src0_abs
+ .addImm(-1); // $src0_sel
if (Src1Reg) {
MIB.addReg(Src1Reg) // $src1
.addImm(0) // $src1_neg
.addImm(0) // $src1_rel
- .addImm(0); // $src1_abs
+ .addImm(0) // $src1_abs
+ .addImm(-1); // $src1_sel
}
//XXX: The r600g finalizer expects this to be 1, once we've moved the
@@ -521,16 +643,6 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
- const static int OpTable[3][R600Operands::COUNT] = {
-// W C S S S S S S S S
-// R O D L S R R R S R R R S R R L P
-// D U I M R A R C C C C C C C R C C A R I
-// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
-// T M P E D L P 0 N R A 1 N R A 2 N R T D M
- {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
- {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
- {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
- };
unsigned TargetFlags = get(Opcode).TSFlags;
unsigned OpTableIdx;
@@ -556,7 +668,7 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
OpTableIdx = 2;
}
- return OpTable[OpTableIdx][Op];
+ return R600Operands::ALUOpTable[OpTableIdx][Op];
}
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index 11685af..efe721c 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -113,6 +113,38 @@ namespace llvm {
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
+ /// \returns a list of all the registers that may be accesed using indirect
+ /// addressing.
+ std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
+
+
+ ///buildDefaultInstruction - This function returns a MachineInstr with
+ /// all the instruction modifiers initialized to their default values.
/// You can use this function to avoid manually specifying each instruction
/// modifier operand when building a new instruction.
///
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 64bab18..8242df9 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -70,6 +70,11 @@ class InstFlag<string PM = "printOperand", int Default = 0>
let PrintMethod = PM;
}
+// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
+def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
+ let PrintMethod = "printSel";
+}
+
def LITERAL : InstFlag<"printLiteral">;
def WRITE : InstFlag <"printWrite", 1>;
@@ -86,9 +91,16 @@ def UP : InstFlag <"printUpdatePred">;
// default to 0.
def LAST : InstFlag<"printLast", 1>;
+def FRAMEri : Operand<iPTR> {
+ let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
+}
+
def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
+def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
+def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
class R600ALU_Word0 {
field bits<32> Word0;
@@ -173,6 +185,55 @@ class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{
let Word1{17-13} = alu_inst;
}
+class VTX_WORD0 {
+ field bits<32> Word0;
+ bits<7> SRC_GPR;
+ bits<5> VC_INST;
+ bits<2> FETCH_TYPE;
+ bits<1> FETCH_WHOLE_QUAD;
+ bits<8> BUFFER_ID;
+ bits<1> SRC_REL;
+ bits<2> SRC_SEL_X;
+ bits<6> MEGA_FETCH_COUNT;
+
+ let Word0{4-0} = VC_INST;
+ let Word0{6-5} = FETCH_TYPE;
+ let Word0{7} = FETCH_WHOLE_QUAD;
+ let Word0{15-8} = BUFFER_ID;
+ let Word0{22-16} = SRC_GPR;
+ let Word0{23} = SRC_REL;
+ let Word0{25-24} = SRC_SEL_X;
+ let Word0{31-26} = MEGA_FETCH_COUNT;
+}
+
+class VTX_WORD1_GPR {
+ field bits<32> Word1;
+ bits<7> DST_GPR;
+ bits<1> DST_REL;
+ bits<3> DST_SEL_X;
+ bits<3> DST_SEL_Y;
+ bits<3> DST_SEL_Z;
+ bits<3> DST_SEL_W;
+ bits<1> USE_CONST_FIELDS;
+ bits<6> DATA_FORMAT;
+ bits<2> NUM_FORMAT_ALL;
+ bits<1> FORMAT_COMP_ALL;
+ bits<1> SRF_MODE_ALL;
+
+ let Word1{6-0} = DST_GPR;
+ let Word1{7} = DST_REL;
+ let Word1{8} = 0; // Reserved
+ let Word1{11-9} = DST_SEL_X;
+ let Word1{14-12} = DST_SEL_Y;
+ let Word1{17-15} = DST_SEL_Z;
+ let Word1{20-18} = DST_SEL_W;
+ let Word1{21} = USE_CONST_FIELDS;
+ let Word1{27-22} = DATA_FORMAT;
+ let Word1{29-28} = NUM_FORMAT_ALL;
+ let Word1{30} = FORMAT_COMP_ALL;
+ let Word1{31} = SRF_MODE_ALL;
+}
+
/*
XXX: R600 subtarget uses a slightly different encoding than the other
subtargets. We currently handle this in R600MCCodeEmitter, but we may
@@ -214,11 +275,11 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -254,13 +315,13 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName,
"$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
- "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
- "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
+ "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
+ "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -291,14 +352,14 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
InstR600 <0,
(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
- R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
- R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
- R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
+ R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
!strconcat(opName, "$clamp $dst$dst_rel, "
- "$src0_neg$src0$src0_rel, "
- "$src1_neg$src1$src1_rel, "
- "$src2_neg$src2$src2_rel, "
+ "$src0_neg$src0$src0_sel$src0_rel, "
+ "$src1_neg$src1$src1_sel$src1_rel, "
+ "$src2_neg$src2$src2_sel$src2_rel, "
"$literal $pred_sel$last"),
pattern,
itin>,
@@ -342,6 +403,27 @@ def TEX_SHADOW : PatLeaf<
}]
>;
+def TEX_RECT : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 5;
+ }]
+>;
+
+def TEX_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 11 || TType == 12 || TType == 17;
+ }]
+>;
+
class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs,
dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern> {
@@ -414,32 +496,35 @@ def isR600toCayman : Predicate<
"Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
//===----------------------------------------------------------------------===//
-// Interpolation Instructions
+// R600 SDNodes
//===----------------------------------------------------------------------===//
-def INTERP: SDNode<"AMDGPUISD::INTERP",
- SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
- >;
+def INTERP_PAIR_XY : AMDGPUShaderInst <
+ (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
+ []>;
-def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
- SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
- >;
+def INTERP_PAIR_ZW : AMDGPUShaderInst <
+ (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
+ (ins i32imm:$src0, R600_Reg32:$src1, R600_Reg32:$src2),
+ "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
+ []>;
-let usesCustomInserter = 1 in {
-def input_perspective : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src0, i32imm:$src1),
- "input_perspective $src0 $src1 : dst",
- [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
-} // End usesCustomInserter = 1
-
-def input_constant : AMDGPUShaderInst <
- (outs R600_Reg128:$dst),
- (ins i32imm:$src),
- "input_perspective $src : dst",
- [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
+def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
+ SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
+ [SDNPMayLoad]
+>;
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+def INTERP_VEC_LOAD : AMDGPUShaderInst <
+ (outs R600_Reg128:$dst),
+ (ins i32imm:$src0),
+ "INTERP_LOAD $src0 : $dst",
+ []>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
@@ -455,7 +540,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
// Export Instructions
//===----------------------------------------------------------------------===//
-def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>;
+def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
[SDNPHasChain, SDNPSideEffect]>;
@@ -507,53 +592,59 @@ class ExportBufWord1 {
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
- def : Pat<(int_R600_store_pixel_dummy),
+ def : Pat<(int_R600_store_dummy (i32 imm:$type)),
(ExportInst
- (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0)
+ (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0)
>;
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
- 0, 1, 2, 3, cf_inst, 0)
+ def : Pat<(int_R600_store_dummy 1),
+ (ExportInst
+ (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0)
+ >;
+
+ def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+ (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
+ (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
+ imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
>;
+
}
multiclass SteamOutputExportPattern<Instruction ExportInst,
bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
// Stream0
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf0inst, 0)>;
// Stream1
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4),
- (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)),
- (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase,
+ def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
+ (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
+ (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;
}
-let isTerminator = 1, usesCustomInserter = 1 in {
+let usesCustomInserter = 1 in {
class ExportSwzInst : InstR600ISA<(
outs),
@@ -567,7 +658,7 @@ class ExportSwzInst : InstR600ISA<(
let Inst{63-32} = Word1;
}
-} // End isTerminator = 1, usesCustomInserter = 1
+} // End usesCustomInserter = 1
class ExportBufInst : InstR600ISA<(
outs),
@@ -580,7 +671,7 @@ class ExportBufInst : InstR600ISA<(
let Inst{63-32} = Word1;
}
-let Predicates = [isR600toCayman] in {
+let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
// Common Instructions R600, R700, Evergreen, Cayman
@@ -624,6 +715,34 @@ def SNE : R600_2OP <
COND_NE))]
>;
+def SETE_DX10 : R600_2OP <
+ 0xC, "SETE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_EQ))]
+>;
+
+def SETGT_DX10 : R600_2OP <
+ 0xD, "SETGT_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GT))]
+>;
+
+def SETGE_DX10 : R600_2OP <
+ 0xE, "SETGE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_GE))]
+>;
+
+def SETNE_DX10 : R600_2OP <
+ 0xF, "SETNE_DX10",
+ [(set R600_Reg32:$dst,
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
+ COND_NE))]
+>;
+
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
@@ -684,7 +803,7 @@ def SETE_INT : R600_2OP <
>;
def SETGT_INT : R600_2OP <
- 0x3B, "SGT_INT",
+ 0x3B, "SETGT_INT",
[(set (i32 R600_Reg32:$dst),
(selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
>;
@@ -830,8 +949,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP <
class MULADD_Common <bits<5> inst> : R600_3OP <
inst, "MULADD",
+ []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+ inst, "MULADD_IEEE",
[(set (f32 R600_Reg32:$dst),
- (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+ (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
>;
class CNDE_Common <bits<5> inst> : R600_3OP <
@@ -988,6 +1112,7 @@ let Predicates = [isR600] in {
def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
def MULADD_r600 : MULADD_Common<0x10>;
+ def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
def CNDE_r600 : CNDE_Common<0x18>;
def CNDGT_r600 : CNDGT_Common<0x19>;
def CNDGE_r600 : CNDGE_Common<0x1A>;
@@ -1070,7 +1195,7 @@ let Predicates = [isR700] in {
//===----------------------------------------------------------------------===//
let Predicates = [isEG] in {
-
+
def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>;
defm DIV_eg : DIV_Common<RECIP_IEEE_eg>;
@@ -1127,6 +1252,7 @@ let Predicates = [isEGorCayman] in {
>;
def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
def ASHR_eg : ASHR_Common<0x15>;
def LSHR_eg : LSHR_Common<0x16>;
def LSHL_eg : LSHL_Common<0x17>;
@@ -1138,6 +1264,10 @@ let Predicates = [isEGorCayman] in {
defm DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+let hasSideEffects = 1 in {
+ def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>;
+}
+
def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>;
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
@@ -1228,37 +1358,30 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
>;
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
- : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern> {
-
- // Operands
- bits<7> DST_GPR;
- bits<7> SRC_GPR;
+ : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern>,
+ VTX_WORD1_GPR, VTX_WORD0 {
// Static fields
- bits<5> VC_INST = 0;
- bits<2> FETCH_TYPE = 2;
- bits<1> FETCH_WHOLE_QUAD = 0;
- bits<8> BUFFER_ID = buffer_id;
- bits<1> SRC_REL = 0;
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = buffer_id;
+ let SRC_REL = 0;
// XXX: We can infer this field based on the SRC_GPR. This would allow us
// to store vertex addresses in any channel, not just X.
- bits<2> SRC_SEL_X = 0;
- bits<6> MEGA_FETCH_COUNT;
- bits<1> DST_REL = 0;
- bits<3> DST_SEL_X;
- bits<3> DST_SEL_Y;
- bits<3> DST_SEL_Z;
- bits<3> DST_SEL_W;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
// The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
// FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
// however, based on my testing if USE_CONST_FIELDS is set, then all
// these fields need to be set to 0.
- bits<1> USE_CONST_FIELDS = 0;
- bits<6> DATA_FORMAT;
- bits<2> NUM_FORMAT_ALL = 1;
- bits<1> FORMAT_COMP_ALL = 0;
- bits<1> SRF_MODE_ALL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 1;
+ let FORMAT_COMP_ALL = 0;
+ let SRF_MODE_ALL = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
// LLVM can only encode 64-bit instructions, so these fields are manually
// encoded in R600CodeEmitter
//
@@ -1269,29 +1392,7 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
// bits<1> ALT_CONST = 0;
// bits<2> BUFFER_INDEX_MODE = 0;
- // VTX_WORD0
- let Inst{4-0} = VC_INST;
- let Inst{6-5} = FETCH_TYPE;
- let Inst{7} = FETCH_WHOLE_QUAD;
- let Inst{15-8} = BUFFER_ID;
- let Inst{22-16} = SRC_GPR;
- let Inst{23} = SRC_REL;
- let Inst{25-24} = SRC_SEL_X;
- let Inst{31-26} = MEGA_FETCH_COUNT;
-
- // VTX_WORD1_GPR
- let Inst{38-32} = DST_GPR;
- let Inst{39} = DST_REL;
- let Inst{40} = 0; // Reserved
- let Inst{43-41} = DST_SEL_X;
- let Inst{46-44} = DST_SEL_Y;
- let Inst{49-47} = DST_SEL_Z;
- let Inst{52-50} = DST_SEL_W;
- let Inst{53} = USE_CONST_FIELDS;
- let Inst{59-54} = DATA_FORMAT;
- let Inst{61-60} = NUM_FORMAT_ALL;
- let Inst{62} = FORMAT_COMP_ALL;
- let Inst{63} = SRF_MODE_ALL;
+
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
// is done in R600CodeEmitter
@@ -1346,7 +1447,7 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
// This is not really necessary, but there were some GPU hangs that appeared
// to be caused by ALU instructions in the next instruction group that wrote
- // to the $ptr registers of the VTX_READ.
+ // to the $ptr registers of the VTX_READ.
// e.g.
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
// %T2_X<def> = MOV %ZERO
@@ -1387,6 +1488,10 @@ def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
[(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
>;
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+>;
+
//===----------------------------------------------------------------------===//
// VTX Read from global memory space
//===----------------------------------------------------------------------===//
@@ -1417,9 +1522,15 @@ def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
}
+//===----------------------------------------------------------------------===//
+// Regist loads and stores - for indirect addressing
+//===----------------------------------------------------------------------===//
+
+defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+
let Predicates = [isCayman] in {
-let isVector = 1 in {
+let isVector = 1 in {
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
@@ -1476,6 +1587,7 @@ def PRED_X : InstR600 <
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"", [], NullALU> {
let FlagOperandIdx = 3;
+ let isTerminator = 1;
}
let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
@@ -1502,19 +1614,6 @@ def MASK_WRITE : AMDGPUShaderInst <
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-def R600_LOAD_CONST : AMDGPUShaderInst <
- (outs R600_Reg32:$dst),
- (ins i32imm:$src0),
- "R600_LOAD_CONST $dst, $src0",
- [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
->;
-
-def RESERVE_REG : AMDGPUShaderInst <
- (outs),
- (ins i32imm:$src),
- "RESERVE_REG $src",
- [(int_AMDGPU_reserve_reg imm:$src)]
->;
def TXD: AMDGPUShaderInst <
(outs R600_Reg128:$dst),
@@ -1540,11 +1639,138 @@ def FNEG_R600 : FNEG<R600_Reg32>;
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(IL_retflag)]>;
}
+
+//===----------------------------------------------------------------------===//
+// Constant Buffer Addressing Support
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+def CONST_COPY : Instruction {
+ let OutOperandList = (outs R600_Reg32:$dst);
+ let InOperandList = (ins i32imm:$src);
+ let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
+ let AsmString = "CONST_COPY";
+ let neverHasSideEffects = 1;
+ let isAsCheapAsAMove = 1;
+ let Itinerary = NullALU;
+}
+} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
+
+def TEX_VTX_CONSTBUF :
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
+ [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
+ VTX_WORD1_GPR, VTX_WORD0 {
+
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = 0;
+ let SRC_REL = 0;
+ let SRC_SEL_X = 0;
+ let DST_REL = 0;
+ let USE_CONST_FIELDS = 0;
+ let NUM_FORMAT_ALL = 2;
+ let FORMAT_COMP_ALL = 1;
+ let SRF_MODE_ALL = 1;
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 35;
+
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+def TEX_VTX_TEXBUF:
+ InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
+ [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+VTX_WORD1_GPR, VTX_WORD0 {
+
+let VC_INST = 0;
+let FETCH_TYPE = 2;
+let FETCH_WHOLE_QUAD = 0;
+let SRC_REL = 0;
+let SRC_SEL_X = 0;
+let DST_REL = 0;
+let USE_CONST_FIELDS = 1;
+let NUM_FORMAT_ALL = 0;
+let FORMAT_COMP_ALL = 0;
+let SRF_MODE_ALL = 1;
+let MEGA_FETCH_COUNT = 16;
+let DST_SEL_X = 0;
+let DST_SEL_Y = 1;
+let DST_SEL_Z = 2;
+let DST_SEL_W = 3;
+let DATA_FORMAT = 0;
+
+let Inst{31-0} = Word0;
+let Inst{63-32} = Word1;
+
+// LLVM can only encode 64-bit instructions, so these fields are manually
+// encoded in R600CodeEmitter
+//
+// bits<16> OFFSET;
+// bits<2> ENDIAN_SWAP = 0;
+// bits<1> CONST_BUF_NO_STRIDE = 0;
+// bits<1> MEGA_FETCH = 0;
+// bits<1> ALT_CONST = 0;
+// bits<2> BUFFER_INDEX_MODE = 0;
+
+
+
+// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
+// is done in R600CodeEmitter
+//
+// Inst{79-64} = OFFSET;
+// Inst{81-80} = ENDIAN_SWAP;
+// Inst{82} = CONST_BUF_NO_STRIDE;
+// Inst{83} = MEGA_FETCH;
+// Inst{84} = ALT_CONST;
+// Inst{86-85} = BUFFER_INDEX_MODE;
+// Inst{95-86} = 0; Reserved
+
+// VTX_WORD3 (Padding)
+//
+// Inst{127-96} = 0;
+}
+
+
+
//===--------------------------------------------------------------------===//
// Instructions support
//===--------------------------------------------------------------------===//
@@ -1641,7 +1867,19 @@ def : Pat <
// SGE Reverse args
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+ (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGT_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+>;
+
+// SETGE_DX10 reverse args
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
>;
// SETGT_INT reverse args
@@ -1682,31 +1920,43 @@ def : Pat <
(SETE R600_Reg32:$src0, R600_Reg32:$src1)
>;
+//SETE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
+ (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
//SNE - 'true if unordered'
def : Pat <
(selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
(SNE R600_Reg32:$src0, R600_Reg32:$src1)
>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
+//SETNE_DX10 - 'true if ordered'
+def : Pat <
+ (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+>;
+
+def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
+def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
index 3825bc4..dc8980a 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -12,21 +12,20 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "R600", isTarget = 1 in {
- def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_R600_load_input_perspective :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_constant :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
- def int_R600_load_input_linear :
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+ def int_R600_load_input :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_interp_input :
+ Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_load_texbuf :
+ Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_store_swizzle :
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_stream_output :
- Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>;
- def int_R600_store_pixel_color :
- Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
+ Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_pixel_depth :
Intrinsic<[], [llvm_float_ty], []>;
def int_R600_store_pixel_stencil :
Intrinsic<[], [llvm_float_ty], []>;
- def int_R600_store_pixel_dummy :
- Intrinsic<[], [], []>;
+ def int_R600_store_dummy :
+ Intrinsic<[], [llvm_i32_ty], []>;
}
diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp
new file mode 100644
index 0000000..3ebe653
--- /dev/null
+++ b/lib/Target/R600/R600LowerConstCopy.cpp
@@ -0,0 +1,222 @@
+//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
+/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
+/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
+/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
+/// to fold them if possible or replace them by MOV otherwise.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/GlobalValue.h"
+
+namespace llvm {
+
+class R600LowerConstCopy : public MachineFunctionPass {
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ struct ConstPairs {
+ unsigned XYPair;
+ unsigned ZWPair;
+ };
+
+ bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const;
+public:
+ R600LowerConstCopy(TargetMachine &tm);
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
+};
+
+char R600LowerConstCopy::ID = 0;
+
+R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo()))
+{
+}
+
+bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst,
+ unsigned ReadConst) const {
+ unsigned ReadConstChan = ReadConst & 3;
+ unsigned ReadConstIndex = ReadConst & (~3);
+ if (ReadConstChan < 2) {
+ if (!UsedConst.XYPair) {
+ UsedConst.XYPair = ReadConstIndex;
+ }
+ return UsedConst.XYPair == ReadConstIndex;
+ } else {
+ if (!UsedConst.ZWPair) {
+ UsedConst.ZWPair = ReadConstIndex;
+ }
+ return UsedConst.ZWPair == ReadConstIndex;
+ }
+}
+
+static bool isControlFlow(const MachineInstr &MI) {
+ return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) ||
+ (MI.getOpcode() == AMDGPU::ENDIF) ||
+ (MI.getOpcode() == AMDGPU::ELSE) ||
+ (MI.getOpcode() == AMDGPU::WHILELOOP) ||
+ (MI.getOpcode() == AMDGPU::BREAK);
+}
+
+bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ DenseMap<unsigned, MachineInstr *> RegToConstIndex;
+ for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(),
+ E = MBB.instr_end(); I != E;) {
+
+ if (I->getOpcode() == AMDGPU::CONST_COPY) {
+ MachineInstr &MI = *I;
+ I = llvm::next(I);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
+ RegToConstIndex.find(DstReg);
+ if (SrcMI != RegToConstIndex.end()) {
+ SrcMI->second->eraseFromParent();
+ RegToConstIndex.erase(SrcMI);
+ }
+ MachineInstr *NewMI =
+ TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV,
+ MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+ MI.getOperand(1).getImm());
+ RegToConstIndex[DstReg] = NewMI;
+ MI.eraseFromParent();
+ continue;
+ }
+
+ std::vector<unsigned> Defs;
+ // We consider all Instructions as bundled because algorithm that handle
+ // const read port limitations inside an IG is still valid with single
+ // instructions.
+ std::vector<MachineInstr *> Bundle;
+
+ if (I->isBundle()) {
+ unsigned BundleSize = I->getBundleSize();
+ for (unsigned i = 0; i < BundleSize; i++) {
+ I = llvm::next(I);
+ Bundle.push_back(I);
+ }
+ } else if (TII->isALUInstr(I->getOpcode())){
+ Bundle.push_back(I);
+ } else if (isControlFlow(*I)) {
+ RegToConstIndex.clear();
+ I = llvm::next(I);
+ continue;
+ } else {
+ MachineInstr &MI = *I;
+ for (MachineInstr::mop_iterator MOp = MI.operands_begin(),
+ MOpE = MI.operands_end(); MOp != MOpE; ++MOp) {
+ MachineOperand &MO = *MOp;
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(MO.getReg());
+ } else {
+ // Either a TEX or an Export inst, prevent from erasing def of used
+ // operand
+ RegToConstIndex.erase(MO.getReg());
+ for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo());
+ SR.isValid(); ++SR) {
+ RegToConstIndex.erase(*SR);
+ }
+ }
+ }
+ }
+
+
+ R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
+ ItE = Bundle.end(); It != ItE; ++It) {
+ MachineInstr *MI = *It;
+ if (TII->isPredicated(MI)) {
+ // We don't want to erase previous assignment
+ RegToConstIndex.erase(MI->getOperand(0).getReg());
+ } else {
+ int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE);
+ if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm())
+ Defs.push_back(MI->getOperand(0).getReg());
+ }
+ }
+
+ ConstPairs CP = {0,0};
+ for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) {
+ for(std::vector<MachineInstr *>::iterator It = Bundle.begin(),
+ ItE = Bundle.end(); It != ItE; ++It) {
+ MachineInstr *MI = *It;
+ int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]);
+ if (SrcIdx < 0)
+ continue;
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
+ RegToConstIndex.find(MO.getReg());
+ if (SrcMI != RegToConstIndex.end()) {
+ MachineInstr *CstMov = SrcMI->second;
+ int ConstMovSel =
+ TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL);
+ unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm();
+ if (MI->isInsideBundle() && canFoldInBundle(CP, ConstIndex)) {
+ TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex);
+ MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST);
+ } else {
+ RegToConstIndex.erase(SrcMI);
+ }
+ }
+ }
+ }
+
+ for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end();
+ It != ItE; ++It) {
+ DenseMap<unsigned, MachineInstr *>::iterator SrcMI =
+ RegToConstIndex.find(*It);
+ if (SrcMI != RegToConstIndex.end()) {
+ SrcMI->second->eraseFromParent();
+ RegToConstIndex.erase(SrcMI);
+ }
+ }
+ I = llvm::next(I);
+ }
+
+ if (MBB.succ_empty()) {
+ for (DenseMap<unsigned, MachineInstr *>::iterator
+ DI = RegToConstIndex.begin(), DE = RegToConstIndex.end();
+ DI != DE; ++DI) {
+ DI->second->eraseFromParent();
+ }
+ }
+ }
+ return false;
+}
+
+FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
+ return new R600LowerConstCopy(tm);
+}
+
+}
+
+
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 4eb5efa..40aec83 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -13,22 +13,6 @@
using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
- : MachineFunctionInfo(),
- HasLinearInterpolation(false),
- HasPerspectiveInterpolation(false) {
+ : MachineFunctionInfo() {
memset(Outputs, 0, sizeof(Outputs));
- memset(StreamOutputs, 0, sizeof(StreamOutputs));
}
-
-unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const {
- assert(HasPerspectiveInterpolation);
- return 0;
-}
-
-unsigned R600MachineFunctionInfo::GetIJLinearIndex() const {
- assert(HasLinearInterpolation);
- if (HasPerspectiveInterpolation)
- return 1;
- else
- return 0;
-}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index e97fb5b..4b901f4 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -13,6 +13,7 @@
#ifndef R600MACHINEFUNCTIONINFO_H
#define R600MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include <vector>
@@ -23,15 +24,9 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
public:
R600MachineFunctionInfo(const MachineFunction &MF);
- std::vector<unsigned> ReservedRegs;
+ SmallVector<unsigned, 4> LiveOuts;
+ std::vector<unsigned> IndirectRegs;
SDNode *Outputs[16];
- SDNode *StreamOutputs[64][4];
- bool HasLinearInterpolation;
- bool HasPerspectiveInterpolation;
-
- unsigned GetIJLinearIndex() const;
- unsigned GetIJPerspectiveIndex() const;
-
};
} // End llvm namespace
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index a39f83d..bbd7995 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "R600RegisterInfo.h"
#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
+#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
using namespace llvm;
@@ -28,7 +29,6 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
@@ -38,21 +38,30 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::NEG_ONE);
Reserved.set(AMDGPU::PV_X);
Reserved.set(AMDGPU::ALU_LITERAL_X);
+ Reserved.set(AMDGPU::ALU_CONST);
Reserved.set(AMDGPU::PREDICATE_BIT);
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
- for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
- E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
+ for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
+ E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
- for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
- E = MFI->ReservedRegs.end(); I != E; ++I) {
+ for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
+ E = AMDGPU::TRegMemRegClass.end();
+ I != E; ++I) {
Reserved.set(*I);
}
+ const R600InstrInfo *RII = static_cast<const R600InstrInfo*>(&TII);
+ std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
+ for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
+ E = IndirectRegs.end();
+ I != E; ++I) {
+ Reserved.set(*I);
+ }
return Reserved;
}
@@ -81,9 +90,10 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
switch (Channel) {
default: assert(!"Invalid channel index"); return 0;
- case 0: return AMDGPU::sel_x;
- case 1: return AMDGPU::sel_y;
- case 2: return AMDGPU::sel_z;
- case 3: return AMDGPU::sel_w;
+ case 0: return AMDGPU::sub0;
+ case 1: return AMDGPU::sub1;
+ case 2: return AMDGPU::sub2;
+ case 3: return AMDGPU::sub3;
}
}
+
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index d3d6d25..ce5994c 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -19,7 +19,7 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
- let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
+ let SubRegIndices = [sub0, sub1, sub2, sub3];
let HWEncoding = encoding;
}
@@ -28,9 +28,11 @@ foreach Index = 0-127 in {
// 32-bit Temporary Registers
def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
- // 32-bit Constant Registers (There are more than 128, this the number
- // that is currently supported.
- def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
+ // Indirect addressing offset registers
+ def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
+ Index, Chan>;
+ def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
+ Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
@@ -42,7 +44,7 @@ foreach Index = 0-127 in {
}
// Array Base Register holding input in FS
-foreach Index = 448-464 in {
+foreach Index = 448-480 in {
def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>;
}
@@ -61,19 +63,25 @@ def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
+def AR_X : R600Reg<"AR.x", 0>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "ArrayBase%u", 448, 464))>;
+ (add (sequence "ArrayBase%u", 448, 480))>;
+// special registers for ALU src operands
+// const buffer reference, SRCx_SEL contains index
+def ALU_CONST : R600Reg<"CBuf", 0>;
+// interpolation param reference, SRCx_SEL contains index
+def ALU_PARAM : R600Reg<"Param", 0>;
-def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (interleave
- (interleave (sequence "C%u_X", 0, 127),
- (sequence "C%u_Z", 0, 127)),
- (interleave (sequence "C%u_Y", 0, 127),
- (sequence "C%u_W", 0, 127))))>;
+let isAllocatable = 0 in {
+
+// XXX: Only use the X channel, until we support wider stack widths
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+
+} // End isAllocatable = 0
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (sequence "T%u_X", 0, 127))>;
+ (add (sequence "T%u_X", 0, 127), AR_X)>;
def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_Y", 0, 127))>;
@@ -85,15 +93,16 @@ def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_W", 0, 127))>;
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
- (add (interleave
- (interleave R600_TReg32_X, R600_TReg32_Z),
- (interleave R600_TReg32_Y, R600_TReg32_W)))>;
+ (interleave R600_TReg32_X, R600_TReg32_Y,
+ R600_TReg32_Z, R600_TReg32_W)>;
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
- R600_CReg32,
R600_ArrayBase,
- ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
+ R600_Addr,
+ ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
+ ALU_CONST, ALU_PARAM
+ )>;
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
@@ -105,3 +114,33 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
(add (sequence "T%u_XYZW", 0, 127))> {
let CopyCost = -1;
}
+
+//===----------------------------------------------------------------------===//
+// Register classes for indirect addressing
+//===----------------------------------------------------------------------===//
+
+// Super register for all the Indirect Registers. This register class is used
+// by the REG_SEQUENCE instruction to specify the registers to use for direct
+// reads / writes which may be written / read by an indirect address.
+class IndirectSuper<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ let Namespace = "AMDGPU";
+ let SubRegIndices =
+ [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
+}
+
+def IndirectSuperReg : IndirectSuper<"Indirect",
+ [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
+ TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
+ TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
+>;
+
+def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
+
+// This register class defines the registers that are the storage units for
+// the "Indirect Addressing" pseudo memory space.
+// XXX: Only use the X channel, until we support wider stack widths
+def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
+ (add (sequence "TRegMem%u_X", 0, 16))
+>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index f580377..2477e2a 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -147,7 +147,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
/// \brief Is BB the last block saved on the stack ?
bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
- return Stack.back().first == BB;
+ return !Stack.empty() && Stack.back().first == BB;
}
/// \brief Pop the last saved value from the control flow stack
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 4c672ca..0a0fbd9 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -26,21 +26,22 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
- TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) {
+ TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
+ TRI(TM.getRegisterInfo()) {
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
- addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
- addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
+ addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
+ addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
+ addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
computeRegisterProperties();
- setOperationAction(ISD::AND, MVT::i1, Custom);
-
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
@@ -62,63 +63,13 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
- const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
MachineBasicBlock::iterator I = MI;
- if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
- AppendS_WAITCNT(MI, *BB, llvm::next(I));
- return BB;
- }
-
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
- case AMDGPU::CLAMP_SI:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- // VSRC1-2 are unused, but we still need to fill all the
- // operand slots, so we just reuse the VSRC0 operand
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(1))
- .addImm(0) // ABS
- .addImm(1) // CLAMP
- .addImm(0) // OMOD
- .addImm(0); // NEG
- MI->eraseFromParent();
- break;
-
- case AMDGPU::FABS_SI:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- // VSRC1-2 are unused, but we still need to fill all the
- // operand slots, so we just reuse the VSRC0 operand
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(1))
- .addImm(1) // ABS
- .addImm(0) // CLAMP
- .addImm(0) // OMOD
- .addImm(0); // NEG
- MI->eraseFromParent();
- break;
-
- case AMDGPU::FNEG_SI:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- // VSRC1-2 are unused, but we still need to fill all the
- // operand slots, so we just reuse the VSRC0 operand
- .addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(1))
- .addImm(0) // ABS
- .addImm(0) // CLAMP
- .addImm(0) // OMOD
- .addImm(1); // NEG
- MI->eraseFromParent();
- break;
case AMDGPU::SHADER_TYPE:
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
MI->getOperand(0).getImm();
@@ -128,29 +79,13 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::SI_INTERP:
LowerSI_INTERP(MI, *BB, I, MRI);
break;
- case AMDGPU::SI_INTERP_CONST:
- LowerSI_INTERP_CONST(MI, *BB, I, MRI);
- break;
- case AMDGPU::SI_KIL:
- LowerSI_KIL(MI, *BB, I, MRI);
- break;
case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI);
break;
- case AMDGPU::SI_V_CNDLT:
- LowerSI_V_CNDLT(MI, *BB, I, MRI);
- break;
}
return BB;
}
-void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I) const {
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
-}
-
-
void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
@@ -190,57 +125,6 @@ void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MI->eraseFromParent();
}
-void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
- MachineBasicBlock &BB, MachineBasicBlock::iterator I,
- MachineRegisterInfo &MRI) const {
- MachineOperand dst = MI->getOperand(0);
- MachineOperand attr_chan = MI->getOperand(1);
- MachineOperand attr = MI->getOperand(2);
- MachineOperand params = MI->getOperand(3);
- unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
- .addOperand(params);
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
- .addOperand(dst)
- .addOperand(attr_chan)
- .addOperand(attr)
- .addReg(M0);
-
- MI->eraseFromParent();
-}
-
-void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
- // Clear this pixel from the exec mask if the operand is negative
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
- AMDGPU::VCC)
- .addReg(AMDGPU::SREG_LIT_0)
- .addOperand(MI->getOperand(0));
-
- MI->eraseFromParent();
-}
-
-void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
- unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-
- BuildMI(BB, I, BB.findDebugLoc(I),
- TII->get(AMDGPU::V_CMP_GT_F32_e32),
- VCC)
- .addReg(AMDGPU::SREG_LIT_0)
- .addOperand(MI->getOperand(1));
-
- BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(2))
- .addReg(VCC);
-
- MI->eraseFromParent();
-}
-
EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
@@ -255,7 +139,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -272,30 +155,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-/// \brief The function is for lowering i1 operations on the
-/// VCC register.
-///
-/// In the VALU context, VCC is a one bit register, but in the
-/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
-/// the SALU can perform operations on the VCC register, we need to promote
-/// the operand types from i1 to i64 in order for tablegen to be able to match
-/// this operation to the correct SALU instruction. We do this promotion by
-/// wrapping the operands in a CopyToReg node.
-///
-SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
- SelectionDAG &DAG,
- unsigned VCCNode) const {
- DebugLoc DL = Op.getDebugLoc();
-
- SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
- DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
- Op.getOperand(0)),
- DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
- Op.getOperand(1)));
-
- return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
-}
-
/// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -500,12 +359,252 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
-#define NODE_NAME_CASE(node) case SIISD::node: return #node;
+/// \brief Test if RegClass is one of the VSrc classes
+static bool isVSrc(unsigned RegClass) {
+ return AMDGPU::VSrc_32RegClassID == RegClass ||
+ AMDGPU::VSrc_64RegClassID == RegClass;
+}
+
+/// \brief Test if RegClass is one of the SSrc classes
+static bool isSSrc(unsigned RegClass) {
+ return AMDGPU::SSrc_32RegClassID == RegClass ||
+ AMDGPU::SSrc_64RegClassID == RegClass;
+}
+
+/// \brief Analyze the possible immediate value Op
+///
+/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
+/// and the immediate value if it's a literal immediate
+int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
+
+ union {
+ int32_t I;
+ float F;
+ } Imm;
+
+ if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+ Imm.I = Node->getSExtValue();
+ else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+ Imm.F = Node->getValueAPF().convertToFloat();
+ else
+ return -1; // It isn't an immediate
+
+ if ((Imm.I >= -16 && Imm.I <= 64) ||
+ Imm.F == 0.5f || Imm.F == -0.5f ||
+ Imm.F == 1.0f || Imm.F == -1.0f ||
+ Imm.F == 2.0f || Imm.F == -2.0f ||
+ Imm.F == 4.0f || Imm.F == -4.0f)
+ return 0; // It's an inline immediate
+
+ return Imm.I; // It's a literal immediate
+}
+
+/// \brief Try to fold an immediate directly into an instruction
+bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
+ bool &ScalarSlotUsed) const {
+
+ MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
+ if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+ return false;
+
+ const SDValue &Op = Mov->getOperand(0);
+ int32_t Value = analyzeImmediate(Op.getNode());
+ if (Value == -1) {
+ // Not an immediate at all
+ return false;
+
+ } else if (Value == 0) {
+ // Inline immediates can always be fold
+ Operand = Op;
+ return true;
+
+ } else if (Value == Immediate) {
+ // Already fold literal immediate
+ Operand = Op;
+ return true;
+
+ } else if (!ScalarSlotUsed && !Immediate) {
+ // Fold this literal immediate
+ ScalarSlotUsed = true;
+ Immediate = Value;
+ Operand = Op;
+ return true;
-const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
- NODE_NAME_CASE(VCC_AND)
- NODE_NAME_CASE(VCC_BITCAST)
}
+
+ return false;
+}
+
+/// \brief Does "Op" fit into register class "RegClass" ?
+bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
+ unsigned RegClass) const {
+
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ SDNode *Node = Op.getNode();
+
+ int OpClass;
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
+ const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
+ OpClass = Desc.OpInfo[Op.getResNo()].RegClass;
+
+ } else if (Node->getOpcode() == ISD::CopyFromReg) {
+ RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
+ OpClass = MRI.getRegClass(Reg->getReg())->getID();
+
+ } else
+ return false;
+
+ if (OpClass == -1)
+ return false;
+
+ return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass));
+}
+
+/// \brief Make sure that we don't exeed the number of allowed scalars
+void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+ unsigned RegClass,
+ bool &ScalarSlotUsed) const {
+
+ // First map the operands register class to a destination class
+ if (RegClass == AMDGPU::VSrc_32RegClassID)
+ RegClass = AMDGPU::VReg_32RegClassID;
+ else if (RegClass == AMDGPU::VSrc_64RegClassID)
+ RegClass = AMDGPU::VReg_64RegClassID;
+ else
+ return;
+
+ // Nothing todo if they fit naturaly
+ if (fitsRegClass(DAG, Operand, RegClass))
+ return;
+
+ // If the scalar slot isn't used yet use it now
+ if (!ScalarSlotUsed) {
+ ScalarSlotUsed = true;
+ return;
+ }
+
+ // This is a conservative aproach, it is possible that we can't determine
+ // the correct register class and copy too often, but better save than sorry.
+ SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
+ SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(),
+ Operand.getValueType(), Operand, RC);
+ Operand = SDValue(Node, 0);
+}
+
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+
+ // Original encoding (either e32 or e64)
+ int Opcode = Node->getMachineOpcode();
+ const MCInstrDesc *Desc = &TII->get(Opcode);
+
+ unsigned NumDefs = Desc->getNumDefs();
+ unsigned NumOps = Desc->getNumOperands();
+
+ // e64 version if available, -1 otherwise
+ int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
+ const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+
+ assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
+ assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
+
+ int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
+ bool HaveVSrc = false, HaveSSrc = false;
+
+ // First figure out what we alread have in this instruction
+ for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+ i != e && Op < NumOps; ++i, ++Op) {
+
+ unsigned RegClass = Desc->OpInfo[Op].RegClass;
+ if (isVSrc(RegClass))
+ HaveVSrc = true;
+ else if (isSSrc(RegClass))
+ HaveSSrc = true;
+ else
+ continue;
+
+ int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
+ if (Imm != -1 && Imm != 0) {
+ // Literal immediate
+ Immediate = Imm;
+ }
+ }
+
+ // If we neither have VSrc nor SSrc it makes no sense to continue
+ if (!HaveVSrc && !HaveSSrc)
+ return Node;
+
+ // No scalar allowed when we have both VSrc and SSrc
+ bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
+
+ // Second go over the operands and try to fold them
+ std::vector<SDValue> Ops;
+ bool Promote2e64 = false;
+ for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
+ i != e && Op < NumOps; ++i, ++Op) {
+
+ const SDValue &Operand = Node->getOperand(i);
+ Ops.push_back(Operand);
+
+ // Already folded immediate ?
+ if (isa<ConstantSDNode>(Operand.getNode()) ||
+ isa<ConstantFPSDNode>(Operand.getNode()))
+ continue;
+
+ // Is this a VSrc or SSrc operand ?
+ unsigned RegClass = Desc->OpInfo[Op].RegClass;
+ if (!isVSrc(RegClass) && !isSSrc(RegClass)) {
+
+ if (i == 1 && Desc->isCommutable() &&
+ fitsRegClass(DAG, Ops[0], RegClass) &&
+ foldImm(Ops[1], Immediate, ScalarSlotUsed)) {
+
+ assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) ||
+ isSSrc(Desc->OpInfo[NumDefs].RegClass));
+
+ // Swap commutable operands
+ SDValue Tmp = Ops[1];
+ Ops[1] = Ops[0];
+ Ops[0] = Tmp;
+
+ } else if (DescE64 && !Immediate) {
+ // Test if it makes sense to switch to e64 encoding
+
+ RegClass = DescE64->OpInfo[Op].RegClass;
+ int32_t TmpImm = -1;
+ if ((isVSrc(RegClass) || isSSrc(RegClass)) &&
+ foldImm(Ops[i], TmpImm, ScalarSlotUsed)) {
+
+ Immediate = -1;
+ Promote2e64 = true;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
+ }
+ continue;
+ }
+
+ // Try to fold the immediates
+ if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
+ // Folding didn't worked, make sure we don't hit the SReg limit
+ ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
+ }
+ }
+
+ if (Promote2e64) {
+ // Add the modifier flags while promoting
+ for (unsigned i = 0; i < 4; ++i)
+ Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
+ }
+
+ // Add optional chain and glue
+ for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ // Either create a complete new or update the current instruction
+ if (Promote2e64)
+ return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(),
+ Node->getVTList(), Ops.data(), Ops.size());
+ else
+ return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index c088112..737162f 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -22,31 +22,25 @@ namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering {
const SIInstrInfo * TII;
+ const TargetRegisterInfo * TRI;
- /// Memory reads and writes are syncronized using the S_WAITCNT instruction.
- /// This function takes the most conservative approach and inserts an
- /// S_WAITCNT instruction after every read and write.
- void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I) const;
void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, unsigned Opocde) const;
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
- void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
- MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
- SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
- unsigned VCCNode) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ bool foldImm(SDValue &Operand, int32_t &Immediate,
+ bool &ScalarSlotUsed) const;
+ bool fitsRegClass(SelectionDAG &DAG, SDValue &Op, unsigned RegClass) const;
+ void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
+ unsigned RegClass, bool &ScalarSlotUsed) const;
+
public:
SITargetLowering(TargetMachine &tm);
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
@@ -54,7 +48,9 @@ public:
virtual EVT getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- virtual const char* getTargetNodeName(unsigned Opcode) const;
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+
+ int32_t analyzeImmediate(const SDNode *N) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
new file mode 100644
index 0000000..24fc929
--- /dev/null
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -0,0 +1,353 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief One variable for each of the hardware counters
+typedef union {
+ struct {
+ unsigned VM;
+ unsigned EXP;
+ unsigned LGKM;
+ } Named;
+ unsigned Array[3];
+
+} Counters;
+
+typedef Counters RegCounters[512];
+typedef std::pair<unsigned, unsigned> RegInterval;
+
+class SIInsertWaits : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo &TRI;
+ const MachineRegisterInfo *MRI;
+
+ /// \brief Constant hardware limits
+ static const Counters WaitCounts;
+
+ /// \brief Constant zero value
+ static const Counters ZeroCounts;
+
+ /// \brief Counter values we have already waited on.
+ Counters WaitedOn;
+
+ /// \brief Counter values for last instruction issued.
+ Counters LastIssued;
+
+ /// \brief Registers used by async instructions.
+ RegCounters UsedRegs;
+
+ /// \brief Registers defined by async instructions.
+ RegCounters DefinedRegs;
+
+ /// \brief Different export instruction types seen since last wait.
+ unsigned ExpInstrTypesSeen;
+
+ /// \brief Get increment/decrement amount for this instruction.
+ Counters getHwCounts(MachineInstr &MI);
+
+ /// \brief Is operand relevant for async execution?
+ bool isOpRelevant(MachineOperand &Op);
+
+ /// \brief Get register interval an operand affects.
+ RegInterval getRegInterval(MachineOperand &Op);
+
+ /// \brief Handle instructions async components
+ void pushInstruction(MachineInstr &MI);
+
+ /// \brief Insert the actual wait instruction
+ bool insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Counts);
+
+ /// \brief Resolve all operand dependencies to counter requirements
+ Counters handleOperands(MachineInstr &MI);
+
+public:
+ SIInsertWaits(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
+ TRI(TII->getRegisterInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI insert wait instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SIInsertWaits::ID = 0;
+
+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+
+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
+ return new SIInsertWaits(tm);
+}
+
+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
+
+ uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+ Counters Result;
+
+ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
+
+ // Only consider stores or EXP for EXP_CNT
+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
+ (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore()));
+
+ // LGKM may uses larger values
+ if (TSFlags & SIInstrFlags::LGKM_CNT) {
+
+ MachineOperand &Op = MI.getOperand(0);
+ assert(Op.isReg() && "First LGKM operand must be a register!");
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+ } else {
+ Result.Named.LGKM = 0;
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
+
+ // Constants are always irrelevant
+ if (!Op.isReg())
+ return false;
+
+ // Defines are always relevant
+ if (Op.isDef())
+ return true;
+
+ // For exports all registers are relevant
+ MachineInstr &MI = *Op.getParent();
+ if (MI.getOpcode() == AMDGPU::EXP)
+ return true;
+
+ // For stores the stored value is also relevant
+ if (!MI.getDesc().mayStore())
+ return false;
+
+ for (MachineInstr::mop_iterator I = MI.operands_begin(),
+ E = MI.operands_end(); I != E; ++I) {
+
+ if (I->isReg() && I->isUse())
+ return Op.isIdenticalTo(*I);
+ }
+
+ return false;
+}
+
+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
+
+ if (!Op.isReg())
+ return std::make_pair(0, 0);
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+
+ assert(Size >= 4);
+
+ RegInterval Result;
+ Result.first = TRI.getEncodingValue(Reg);
+ Result.second = Result.first + Size / 4;
+
+ return Result;
+}
+
+void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+
+ // Get the hardware counter increments and sum them up
+ Counters Increment = getHwCounts(MI);
+ unsigned Sum = 0;
+
+ for (unsigned i = 0; i < 3; ++i) {
+ LastIssued.Array[i] += Increment.Array[i];
+ Sum += Increment.Array[i];
+ }
+
+ // If we don't increase anything then that's it
+ if (Sum == 0)
+ return;
+
+ // Remember which export instructions we have seen
+ if (Increment.Named.EXP) {
+ ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ if (!isOpRelevant(Op))
+ continue;
+
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ // Remember which registers we define
+ if (Op.isDef())
+ DefinedRegs[j] = LastIssued;
+
+ // and which one we are using
+ if (Op.isUse())
+ UsedRegs[j] = LastIssued;
+ }
+ }
+}
+
+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Required) {
+
+ // End of program? No need to wait on anything
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ return false;
+
+ // Figure out if the async instructions execute in order
+ bool Ordered[3];
+
+ // VM_CNT is always ordered
+ Ordered[0] = true;
+
+ // EXP_CNT is unordered if we have both EXP & VM-writes
+ Ordered[1] = ExpInstrTypesSeen == 3;
+
+ // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
+ Ordered[2] = false;
+
+ // The values we are going to put into the S_WAITCNT instruction
+ Counters Counts = WaitCounts;
+
+ // Do we really need to wait?
+ bool NeedWait = false;
+
+ for (unsigned i = 0; i < 3; ++i) {
+
+ if (Required.Array[i] <= WaitedOn.Array[i])
+ continue;
+
+ NeedWait = true;
+
+ if (Ordered[i]) {
+ unsigned Value = LastIssued.Array[i] - Required.Array[i];
+
+ // adjust the value to the real hardware posibilities
+ Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+
+ } else
+ Counts.Array[i] = 0;
+
+ // Remember on what we have waited on
+ WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
+ }
+
+ if (!NeedWait)
+ return false;
+
+ // Reset EXP_CNT instruction types
+ if (Counts.Named.EXP == 0)
+ ExpInstrTypesSeen = 0;
+
+ // Build the wait instruction
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm((Counts.Named.VM & 0xF) |
+ ((Counts.Named.EXP & 0x7) << 4) |
+ ((Counts.Named.LGKM & 0x7) << 8));
+
+ return true;
+}
+
+/// \brief helper function for handleOperands
+static void increaseCounters(Counters &Dst, const Counters &Src) {
+
+ for (unsigned i = 0; i < 3; ++i)
+ Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
+}
+
+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+ Counters Result = ZeroCounts;
+
+ // For each register affected by this
+ // instruction increase the result sequence
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ if (Op.isDef())
+ increaseCounters(Result, UsedRegs[j]);
+
+ if (Op.isUse())
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changes = false;
+
+ MRI = &MF.getRegInfo();
+
+ WaitedOn = ZeroCounts;
+ LastIssued = ZeroCounts;
+
+ memset(&UsedRegs, 0, sizeof(UsedRegs));
+ memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+
+ Changes |= insertWait(MBB, I, handleOperands(*I));
+ pushInstruction(*I);
+ }
+
+ // Wait for everything at the end of the MBB
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ }
+
+ return Changes;
+}
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index aea3b5a..fe417d6 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -1,4 +1,4 @@
-//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
+//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,138 +9,418 @@
//
// SI Instruction format definitions.
//
-// Instructions with _32 take 32-bit operands.
-// Instructions with _64 take 64-bit operands.
-//
-// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
-// encoding is the standard encoding, but instruction that make use of
-// any of the instruction modifiers must use the 64-bit encoding.
-//
-// Instructions with _e32 use the 32-bit encoding.
-// Instructions with _e64 use the 64-bit encoding.
-//
//===----------------------------------------------------------------------===//
-class VOP3b_2IN <bits<9> op, string opName, RegisterClass dstClass,
- RegisterClass src0Class, RegisterClass src1Class,
- list<dag> pattern>
- : VOP3b <op, (outs dstClass:$vdst),
- (ins src0Class:$src0, src1Class:$src1, InstFlag:$src2, InstFlag:$sdst,
- InstFlag:$omod, InstFlag:$neg),
- opName, pattern
->;
+class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
+ AMDGPUInst<outs, ins, asm, pattern> {
+ field bits<1> VM_CNT = 0;
+ field bits<1> EXP_CNT = 0;
+ field bits<1> LGKM_CNT = 0;
-class VOP3_1_32 <bits<9> op, string opName, list<dag> pattern>
- : VOP3b_2IN <op, opName, SReg_1, AllReg_32, VReg_32, pattern>;
+ let TSFlags{0} = VM_CNT;
+ let TSFlags{1} = EXP_CNT;
+ let TSFlags{2} = LGKM_CNT;
+}
-class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
- : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
+class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
-class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
- : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, VReg_64:$src1, VReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
+ field bits<32> Inst;
+ let Size = 4;
+}
+class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI <outs, ins, asm, pattern> {
-class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
- : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
+ field bits<64> Inst;
+ let Size = 8;
+}
-class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
- : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
+//===----------------------------------------------------------------------===//
+// Scalar operations
+//===----------------------------------------------------------------------===//
-class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
- : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
+class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
-class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
- : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+ bits<7> SDST;
+ bits<8> SSRC0;
-class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
- : SOP2 <op, (outs SReg_1:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = op;
+ let Inst{22-16} = SDST;
+ let Inst{31-23} = 0x17d; //encoding;
-class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> :
- VOP1 <
- op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
- >;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
-multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
- def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
- def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
+class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<8> SSRC0;
+ bits<8> SSRC1;
+
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = SDST;
+ let Inst{29-23} = op;
+ let Inst{31-30} = 0x2; // encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
}
-multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
+class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32<outs, ins, asm, pattern> {
+
+ bits<8> SSRC0;
+ bits<8> SSRC1;
- def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+ let Inst{7-0} = SSRC0;
+ let Inst{15-8} = SSRC1;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17e;
- def _e64 : VOP3_64 <
- {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
}
-class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> :
- VOP2 <
- op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
- >;
+class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins , asm, pattern> {
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
+ bits <7> SDST;
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = SDST;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
- def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
+ (outs),
+ ins,
+ asm,
+ pattern > {
- def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
+ bits <16> SIMM16;
+
+ let Inst{15-0} = SIMM16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
}
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
- def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
+ list<dag> pattern> : Enc32<outs, ins, asm, pattern> {
+
+ bits<7> SDST;
+ bits<6> SBASE;
+ bits<8> OFFSET;
+
+ let Inst{7-0} = OFFSET;
+ let Inst{8} = imm;
+ let Inst{14-9} = SBASE;
+ let Inst{21-15} = SDST;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+
+ let LGKM_CNT = 1;
+}
- def _e64 : VOP3_64 <
- {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
+//===----------------------------------------------------------------------===//
+// Vector ALU operations
+//===----------------------------------------------------------------------===//
+
+let Uses = [EXEC] in {
+
+class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = op;
+ let Inst{24-17} = VDST;
+ let Inst{31-25} = 0x3f; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = VDST;
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0; //encoding
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
}
-class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
- : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
+class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<3> ABS;
+ bits<1> CLAMP;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{10-8} = ABS;
+ let Inst{11} = CLAMP;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDST;
+ bits<9> SRC0;
+ bits<9> SRC1;
+ bits<9> SRC2;
+ bits<7> SDST;
+ bits<2> OMOD;
+ bits<3> NEG;
+
+ let Inst{7-0} = VDST;
+ let Inst{14-8} = SDST;
+ let Inst{25-17} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = SRC0;
+ let Inst{49-41} = SRC1;
+ let Inst{58-50} = SRC2;
+ let Inst{60-59} = OMOD;
+ let Inst{63-61} = NEG;
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
-class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
- : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
+class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
+ Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+
+ bits<9> SRC0;
+ bits<8> VSRC1;
+
+ let Inst{8-0} = SRC0;
+ let Inst{16-9} = VSRC1;
+ let Inst{24-17} = op;
+ let Inst{31-25} = 0x3e;
+
+ let DisableEncoding = "$dst";
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
-class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, list<dag> pattern> :
- VOPC <
- op, (ins arc:$src0, vrc:$src1), opName, pattern
- >;
+class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc32 <outs, ins, asm, pattern> {
-multiclass VOPC_32 <bits<9> op, string opName, list<dag> pattern> {
+ bits<8> VDST;
+ bits<8> VSRC;
+ bits<2> ATTRCHAN;
+ bits<6> ATTR;
- def _e32 : VOPC_Helper <
- {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- VReg_32, AllReg_32, opName, pattern
- >;
+ let Inst{7-0} = VSRC;
+ let Inst{9-8} = ATTRCHAN;
+ let Inst{15-10} = ATTR;
+ let Inst{17-16} = op;
+ let Inst{25-18} = VDST;
+ let Inst{31-26} = 0x32; // encoding
- def _e64 : VOP3_1_32 <
- op,
- opName, pattern
- >;
+ let neverHasSideEffects = 1;
+ let mayLoad = 1;
+ let mayStore = 0;
}
-multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
+} // End Uses = [EXEC]
- def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
+//===----------------------------------------------------------------------===//
+// Vector I/O operations
+//===----------------------------------------------------------------------===//
- def _e64 : VOP3_64 <
- {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
- opName, []
- >;
+let Uses = [EXEC] in {
+
+class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<1> LDS;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{16} = LDS;
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ let neverHasSideEffects = 1;
}
-class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
- : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
+class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64<outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<12> OFFSET;
+ bits<1> OFFEN;
+ bits<1> IDXEN;
+ bits<1> GLC;
+ bits<1> ADDR64;
+ bits<4> DFMT;
+ bits<3> NFMT;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<1> SLC;
+ bits<1> TFE;
+ bits<8> SOFFSET;
+
+ let Inst{11-0} = OFFSET;
+ let Inst{12} = OFFEN;
+ let Inst{13} = IDXEN;
+ let Inst{14} = GLC;
+ let Inst{15} = ADDR64;
+ let Inst{18-16} = op;
+ let Inst{22-19} = DFMT;
+ let Inst{25-23} = NFMT;
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{54} = SLC;
+ let Inst{55} = TFE;
+ let Inst{63-56} = SOFFSET;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+
+ let neverHasSideEffects = 1;
+}
-class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
- : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
+class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ bits<8> VDATA;
+ bits<4> DMASK;
+ bits<1> UNORM;
+ bits<1> GLC;
+ bits<1> DA;
+ bits<1> R128;
+ bits<1> TFE;
+ bits<1> LWE;
+ bits<1> SLC;
+ bits<8> VADDR;
+ bits<5> SRSRC;
+ bits<5> SSAMP;
+
+ let Inst{11-8} = DMASK;
+ let Inst{12} = UNORM;
+ let Inst{13} = GLC;
+ let Inst{14} = DA;
+ let Inst{15} = R128;
+ let Inst{16} = TFE;
+ let Inst{17} = LWE;
+ let Inst{24-18} = op;
+ let Inst{25} = SLC;
+ let Inst{31-26} = 0x3c;
+ let Inst{39-32} = VADDR;
+ let Inst{47-40} = VDATA;
+ let Inst{52-48} = SRSRC;
+ let Inst{57-53} = SSAMP;
+
+ let VM_CNT = 1;
+ let EXP_CNT = 1;
+}
+
+def EXP : Enc64<
+ (outs),
+ (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
+ VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
+ [] > {
+
+ bits<4> EN;
+ bits<6> TGT;
+ bits<1> COMPR;
+ bits<1> DONE;
+ bits<1> VM;
+ bits<8> VSRC0;
+ bits<8> VSRC1;
+ bits<8> VSRC2;
+ bits<8> VSRC3;
+
+ let Inst{3-0} = EN;
+ let Inst{9-4} = TGT;
+ let Inst{10} = COMPR;
+ let Inst{11} = DONE;
+ let Inst{12} = VM;
+ let Inst{31-26} = 0x3e;
+ let Inst{39-32} = VSRC0;
+ let Inst{47-40} = VSRC1;
+ let Inst{55-48} = VSRC2;
+ let Inst{63-56} = VSRC3;
+
+ let EXP_CNT = 1;
+}
+} // End Uses = [EXEC]
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index c6ad4d5..d9dbd6a 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -41,7 +41,15 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// never be necessary.
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
- if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+ if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0))
+ .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc))
+ .addReg(DestReg, RegState::Define | RegState::Implicit);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1))
+ .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc));
+ } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -58,9 +66,19 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
+MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
+ bool NewMI) const {
+
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
+ !MI->getOperand(2).isReg())
+ return 0;
+
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+}
+
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
+ MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
MachineInstrBuilder MIB(*MF, MI);
MIB.addReg(DstReg, RegState::Define);
MIB.addImm(Imm);
@@ -76,9 +94,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const {
case AMDGPU::S_MOV_B64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_IMM_F32:
- case AMDGPU::V_MOV_IMM_I32:
- case AMDGPU::S_MOV_IMM_I32:
return true;
}
}
@@ -87,3 +102,51 @@ bool
SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}
+
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ assert(Channel == 0);
+ return RegIndex;
+}
+
+
+int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ llvm_unreachable("Unimplemented");
+}
+
+int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
+ llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address, unsigned OffsetReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+MachineInstrBuilder SIInstrInfo::buildIndirectRead(
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address, unsigned OffsetReg) const {
+ llvm_unreachable("Unimplemented");
+}
+
+const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
+ llvm_unreachable("Unimplemented");
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 631f6c0..5789af5 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -35,11 +35,8 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- /// \returns the encoding type of this instruction.
- unsigned getEncodingType(const MachineInstr &MI) const;
-
- /// \returns the size of this instructions encoding in number of bytes.
- unsigned getEncodingBytes(const MachineInstr &MI) const;
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI=false) const;
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
int64_t Imm) const;
@@ -48,14 +45,48 @@ public:
virtual bool isMov(unsigned Opcode) const;
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
+ virtual unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
+ unsigned SourceReg) const;
+
+ virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+
+ virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const;
+
+ virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
};
+namespace AMDGPU {
+
+ int getVOPe64(uint16_t Opcode);
+
+} // End namespace AMDGPU
+
} // End namespace llvm
namespace SIInstrFlags {
enum Flags {
// First 4 bits are the instruction encoding
- NEED_WAIT = 1 << 4
+ VM_CNT = 1 << 0,
+ EXP_CNT = 1 << 1,
+ LGKM_CNT = 1 << 2
};
}
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 873a451..d6c3f06 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
+//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,521 +8,280 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// SI DAG Profiles
-//===----------------------------------------------------------------------===//
-def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
- SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
-]>;
-
-//===----------------------------------------------------------------------===//
// SI DAG Nodes
//===----------------------------------------------------------------------===//
-// and operation on 64-bit wide vcc
-def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// Special bitcast node for sharing VCC register between VALU and SALU
-def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST",
- SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
+// SMRD takes a 64bit memory address and can only add an 32bit offset
+def SIadd64bit32bit : SDNode<"ISD::ADD",
+ SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
>;
-// and operation on 64-bit wide vcc
-def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
- [SDNPCommutative, SDNPAssociative]
+// Transformation function, extract the lower 32bit of a 64bit immediate
+def LO32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
+}]>;
+
+// Transformation function, extract the upper 32bit of a 64bit immediate
+def HI32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
+}]>;
+
+def IMM8bitDWORD : ImmLeaf <
+ i32, [{
+ return (Imm & ~0x3FC) == 0;
+ }], SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ N->getZExtValue() >> 2, MVT::i32);
+ }]>
>;
-// Special bitcast node for sharing VCC register between VALU and SALU
-def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
- SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
->;
-
-class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
- AMDGPUInst<outs, ins, asm, pattern> {
-
- field bits<4> EncodingType = 0;
- field bits<1> NeedWait = 0;
-
- let TSFlags{3-0} = EncodingType;
- let TSFlags{4} = NeedWait;
-
-}
-
-class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- field bits<32> Inst;
-}
-
-class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
- InstSI <outs, ins, asm, pattern> {
-
- field bits<64> Inst;
-}
-
-class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
- let EncoderMethod = "encodeOperand";
- let MIOperandInfo = opInfo;
-}
-
-def IMM16bit : ImmLeaf <
+def IMM12bit : ImmLeaf <
i16,
- [{return isInt<16>(Imm);}]
+ [{return isUInt<12>(Imm);}]
>;
-def IMM8bit : ImmLeaf <
- i32,
- [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
->;
+class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
+ return ((const SITargetLowering &)TLI).analyzeImmediate(N) == 0;
+}]>;
-def IMM12bit : ImmLeaf <
- i16,
- [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
->;
+//===----------------------------------------------------------------------===//
+// SI assembler operands
+//===----------------------------------------------------------------------===//
-def IMM32bitIn64bit : ImmLeaf <
- i64,
- [{return isInt<32>(Imm);}]
->;
+def SIOperand {
+ int ZERO = 0x80;
+ int VCC = 0x6A;
+}
class GPR4Align <RegisterClass rc> : Operand <vAny> {
let EncoderMethod = "GPR4AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
-class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
+class GPR2Align <RegisterClass rc> : Operand <iPTR> {
let EncoderMethod = "GPR2AlignEncode";
let MIOperandInfo = (ops rc:$reg);
}
-def SMRDmemrr : Operand<iPTR> {
- let MIOperandInfo = (ops SReg_64, SReg_32);
- let EncoderMethod = "GPR2AlignEncode";
-}
-
-def SMRDmemri : Operand<iPTR> {
- let MIOperandInfo = (ops SReg_64, i32imm);
- let EncoderMethod = "SMRDmemriEncode";
-}
-
-def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
-def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
-
-let Uses = [EXEC] in {
-
-def EXP : Enc64<
- (outs),
- (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
- VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
- "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
- [] > {
-
- bits<4> EN;
- bits<6> TGT;
- bits<1> COMPR;
- bits<1> DONE;
- bits<1> VM;
- bits<8> VSRC0;
- bits<8> VSRC1;
- bits<8> VSRC2;
- bits<8> VSRC3;
-
- let Inst{3-0} = EN;
- let Inst{9-4} = TGT;
- let Inst{10} = COMPR;
- let Inst{11} = DONE;
- let Inst{12} = VM;
- let Inst{31-26} = 0x3e;
- let Inst{39-32} = VSRC0;
- let Inst{47-40} = VSRC1;
- let Inst{55-48} = VSRC2;
- let Inst{63-56} = VSRC3;
- let EncodingType = 0; //SIInstrEncodingType::EXP
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
-}
-
-class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
-
- bits<8> VDATA;
- bits<4> DMASK;
- bits<1> UNORM;
- bits<1> GLC;
- bits<1> DA;
- bits<1> R128;
- bits<1> TFE;
- bits<1> LWE;
- bits<1> SLC;
- bits<8> VADDR;
- bits<5> SRSRC;
- bits<5> SSAMP;
-
- let Inst{11-8} = DMASK;
- let Inst{12} = UNORM;
- let Inst{13} = GLC;
- let Inst{14} = DA;
- let Inst{15} = R128;
- let Inst{16} = TFE;
- let Inst{17} = LWE;
- let Inst{24-18} = op;
- let Inst{25} = SLC;
- let Inst{31-26} = 0x3c;
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{57-53} = SSAMP;
-
- let EncodingType = 2; //SIInstrEncodingType::MIMG
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
-}
-
-class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64<outs, ins, asm, pattern> {
-
- bits<8> VDATA;
- bits<12> OFFSET;
- bits<1> OFFEN;
- bits<1> IDXEN;
- bits<1> GLC;
- bits<1> ADDR64;
- bits<4> DFMT;
- bits<3> NFMT;
- bits<8> VADDR;
- bits<5> SRSRC;
- bits<1> SLC;
- bits<1> TFE;
- bits<8> SOFFSET;
-
- let Inst{11-0} = OFFSET;
- let Inst{12} = OFFEN;
- let Inst{13} = IDXEN;
- let Inst{14} = GLC;
- let Inst{15} = ADDR64;
- let Inst{18-16} = op;
- let Inst{22-19} = DFMT;
- let Inst{25-23} = NFMT;
- let Inst{31-26} = 0x3a; //encoding
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{54} = SLC;
- let Inst{55} = TFE;
- let Inst{63-56} = SOFFSET;
- let EncodingType = 3; //SIInstrEncodingType::MTBUF
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
- let neverHasSideEffects = 1;
-}
-
-class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64<outs, ins, asm, pattern> {
-
- bits<8> VDATA;
- bits<12> OFFSET;
- bits<1> OFFEN;
- bits<1> IDXEN;
- bits<1> GLC;
- bits<1> ADDR64;
- bits<1> LDS;
- bits<8> VADDR;
- bits<5> SRSRC;
- bits<1> SLC;
- bits<1> TFE;
- bits<8> SOFFSET;
-
- let Inst{11-0} = OFFSET;
- let Inst{12} = OFFEN;
- let Inst{13} = IDXEN;
- let Inst{14} = GLC;
- let Inst{15} = ADDR64;
- let Inst{16} = LDS;
- let Inst{24-18} = op;
- let Inst{31-26} = 0x38; //encoding
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC;
- let Inst{54} = SLC;
- let Inst{55} = TFE;
- let Inst{63-56} = SOFFSET;
- let EncodingType = 4; //SIInstrEncodingType::MUBUF
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
- let neverHasSideEffects = 1;
-}
+include "SIInstrFormats.td"
-} // End Uses = [EXEC]
-
-class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
-
- bits<7> SDST;
- bits<15> PTR;
- bits<8> OFFSET = PTR{7-0};
- bits<1> IMM = PTR{8};
- bits<6> SBASE = PTR{14-9};
-
- let Inst{7-0} = OFFSET;
- let Inst{8} = IMM;
- let Inst{14-9} = SBASE;
- let Inst{21-15} = SDST;
- let Inst{26-22} = op;
- let Inst{31-27} = 0x18; //encoding
- let EncodingType = 5; //SIInstrEncodingType::SMRD
-
- let NeedWait = 1;
- let usesCustomInserter = 1;
-}
+//===----------------------------------------------------------------------===//
+//
+// SI Instruction multiclass helpers.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
+//
+//===----------------------------------------------------------------------===//
-class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
+//===----------------------------------------------------------------------===//
+// Scalar classes
+//===----------------------------------------------------------------------===//
- bits<7> SDST;
- bits<8> SSRC0;
+class SOP1_32 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0),
+ opName#" $dst, $src0", pattern
+>;
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = op;
- let Inst{22-16} = SDST;
- let Inst{31-23} = 0x17d; //encoding;
- let EncodingType = 6; //SIInstrEncodingType::SOP1
+class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
+class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
-class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<7> SDST;
- bits<8> SSRC0;
- bits<8> SSRC1;
+class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = SSRC1;
- let Inst{22-16} = SDST;
- let Inst{29-23} = op;
- let Inst{31-30} = 0x2; // encoding
- let EncodingType = 7; // SIInstrEncodingType::SOP2
+class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+ op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
+class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
+ op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
-class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32<outs, ins, asm, pattern> {
+class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+ op, (outs SReg_32:$dst), (ins i16imm:$src0),
+ opName#" $dst, $src0", pattern
+>;
- bits<8> SSRC0;
- bits<8> SSRC1;
+class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
+ op, (outs SReg_64:$dst), (ins i16imm:$src0),
+ opName#" $dst, $src0", pattern
+>;
- let Inst{7-0} = SSRC0;
- let Inst{15-8} = SSRC1;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17e;
- let EncodingType = 8; // SIInstrEncodingType::SOPC
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
+ def _IMM : SMRD <
+ op, 1, (outs dstClass:$dst),
+ (ins GPR2Align<SReg_64>:$sbase, i32imm:$offset),
+ asm#" $dst, $sbase, $offset", []
+ >;
- let DisableEncoding = "$dst";
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
+ def _SGPR : SMRD <
+ op, 0, (outs dstClass:$dst),
+ (ins GPR2Align<SReg_64>:$sbase, SReg_32:$soff),
+ asm#" $dst, $sbase, $soff", []
+ >;
}
-class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins , asm, pattern> {
-
- bits <7> SDST;
- bits <16> SIMM16;
-
- let Inst{15-0} = SIMM16;
- let Inst{22-16} = SDST;
- let Inst{27-23} = op;
- let Inst{31-28} = 0xb; //encoding
- let EncodingType = 9; // SIInstrEncodingType::SOPK
+//===----------------------------------------------------------------------===//
+// Vector ALU classes
+//===----------------------------------------------------------------------===//
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
+class VOP <string opName> {
+ string OpName = opName;
}
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
- (outs),
- ins,
- asm,
- pattern > {
-
- bits <16> SIMM16;
-
- let Inst{15-0} = SIMM16;
- let Inst{22-16} = op;
- let Inst{31-23} = 0x17f; // encoding
- let EncodingType = 10; // SIInstrEncodingType::SOPP
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
-
-let Uses = [EXEC] in {
-
-class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<8> VSRC;
- bits<2> ATTRCHAN;
- bits<6> ATTR;
-
- let Inst{7-0} = VSRC;
- let Inst{9-8} = ATTRCHAN;
- let Inst{15-10} = ATTR;
- let Inst{17-16} = op;
- let Inst{25-18} = VDST;
- let Inst{31-26} = 0x32; // encoding
- let EncodingType = 11; // SIInstrEncodingType::VINTRP
-
- let neverHasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 0;
+multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
+ string opName, list<dag> pattern> {
+
+ def _e32 : VOP1 <
+ op, (outs drc:$dst), (ins src:$src0),
+ opName#"_e32 $dst, $src0", pattern
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs drc:$dst),
+ (ins src:$src0,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName> {
+ let SRC1 = SIOperand.ZERO;
+ let SRC2 = SIOperand.ZERO;
+ }
}
-class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = op;
- let Inst{24-17} = VDST;
- let Inst{31-25} = 0x3f; //encoding
-
- let EncodingType = 12; // SIInstrEncodingType::VOP1
- let PostEncoderMethod = "VOPPostEncode";
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
+multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
+multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> {
+ def _e32 : VOP2 <
+ op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
+ opName#"_e32 $dst, $src0, $src1", pattern
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs vrc:$dst),
+ (ins arc:$src0, arc:$src1,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName> {
+ let SRC2 = SIOperand.ZERO;
+ }
}
-class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc32 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
- bits<8> VSRC1;
-
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
- let Inst{24-17} = VDST;
- let Inst{30-25} = op;
- let Inst{31} = 0x0; //encoding
-
- let EncodingType = 13; // SIInstrEncodingType::VOP2
- let PostEncoderMethod = "VOPPostEncode";
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
+multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern>
+ : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
+multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern>
+ : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> {
+
+ def _e32 : VOP2 <
+ op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
+ opName#"_e32 $dst, $src0, $src1", pattern
+ >, VOP <opName>;
+
+ def _e64 : VOP3b <
+ {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1,
+ i32imm:$abs, i32imm:$clamp,
+ i32imm:$omod, i32imm:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ >, VOP <opName> {
+ let SRC2 = SIOperand.ZERO;
+ /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
+ can write it into any SGPR. We currently don't use the carry out,
+ so for now hardcode it to VCC as well */
+ let SDST = SIOperand.VCC;
+ }
}
-class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
- bits<9> SRC1;
- bits<9> SRC2;
- bits<3> ABS;
- bits<1> CLAMP;
- bits<2> OMOD;
- bits<3> NEG;
-
- let Inst{7-0} = VDST;
- let Inst{10-8} = ABS;
- let Inst{11} = CLAMP;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = SRC0;
- let Inst{49-41} = SRC1;
- let Inst{58-50} = SRC2;
- let Inst{60-59} = OMOD;
- let Inst{63-61} = NEG;
-
- let EncodingType = 14; // SIInstrEncodingType::VOP3
- let PostEncoderMethod = "VOPPostEncode";
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
+multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
+ string opName, ValueType vt, PatLeaf cond> {
+
+ def _e32 : VOPC <
+ op, (ins arc:$src0, vrc:$src1),
+ opName#"_e32 $dst, $src0, $src1", []
+ >, VOP <opName>;
+
+ def _e64 : VOP3 <
+ {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+ (outs SReg_64:$dst),
+ (ins arc:$src0, arc:$src1,
+ InstFlag:$abs, InstFlag:$clamp,
+ InstFlag:$omod, InstFlag:$neg),
+ opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
+ !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
+ [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
+ )
+ >, VOP <opName> {
+ let SRC2 = SIOperand.ZERO;
+ }
}
-class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
-
- bits<8> VDST;
- bits<9> SRC0;
- bits<9> SRC1;
- bits<9> SRC2;
- bits<7> SDST;
- bits<2> OMOD;
- bits<3> NEG;
-
- let Inst{7-0} = VDST;
- let Inst{14-8} = SDST;
- let Inst{25-17} = op;
- let Inst{31-26} = 0x34; //encoding
- let Inst{40-32} = SRC0;
- let Inst{49-41} = SRC1;
- let Inst{58-50} = SRC2;
- let Inst{60-59} = OMOD;
- let Inst{63-61} = NEG;
-
- let EncodingType = 14; // SIInstrEncodingType::VOP3
- let PostEncoderMethod = "VOPPostEncode";
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
+multiclass VOPC_32 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
-class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
- Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
+multiclass VOPC_64 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
- bits<9> SRC0;
- bits<8> VSRC1;
+class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+ op, (outs VReg_32:$dst),
+ (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
+ i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
- let Inst{8-0} = SRC0;
- let Inst{16-9} = VSRC1;
- let Inst{24-17} = op;
- let Inst{31-25} = 0x3e;
-
- let EncodingType = 15; //SIInstrEncodingType::VOPC
- let PostEncoderMethod = "VOPPostEncode";
- let DisableEncoding = "$dst";
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
+class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+ op, (outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
+ i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+>, VOP <opName>;
-} // End Uses = [EXEC]
+//===----------------------------------------------------------------------===//
+// Vector I/O classes
+//===----------------------------------------------------------------------===//
-class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
- (outs VReg_128:$vdata),
- (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
- GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
- asm,
+ (outs),
+ (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+ i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
+ GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
- let mayLoad = 1;
- let mayStore = 0;
+ let mayStore = 1;
+ let mayLoad = 0;
}
class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
@@ -530,8 +289,9 @@ class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
- i1imm:$tfe, SReg_32:$soffset),
- asm,
+ i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+ #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
let mayLoad = 1;
let mayStore = 0;
@@ -542,48 +302,38 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
(outs regClass:$dst),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
- i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
- asm,
+ i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
+ asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
+ #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
let mayLoad = 1;
let mayStore = 0;
}
-class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
+class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
op,
- (outs),
- (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
- i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
- GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
- asm,
+ (outs VReg_128:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr,
+ GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
[]> {
- let mayStore = 1;
- let mayLoad = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
}
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
- ValueType vt> {
- def _IMM : SMRD <
- op,
- (outs dstClass:$dst),
- (ins SMRDmemri:$src0),
- asm,
- [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
- >;
-
- def _SGPR : SMRD <
- op,
- (outs dstClass:$dst),
- (ins SMRDmemrr:$src0),
- asm,
- [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
- >;
-}
+//===----------------------------------------------------------------------===//
+// Vector instruction mappings
+//===----------------------------------------------------------------------===//
-multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
- defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
- defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
+// Maps an opcode in e32 form to its e64 equivalent
+def getVOPe64 : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["Size"];
+ let KeyCol = ["4"];
+ let ValueCols = [["8"]];
}
-include "SIInstrFormats.td"
include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 005be96..af116f0 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -11,16 +11,31 @@
// that are not yet supported remain commented out.
//===----------------------------------------------------------------------===//
+class InterpSlots {
+int P0 = 2;
+int P10 = 0;
+int P20 = 1;
+}
+def INTERP : InterpSlots;
+
+def InterpSlot : Operand<i32> {
+ let PrintMethod = "printInterpSlot";
+}
+
def isSI : Predicate<"Subtarget.device()"
"->getGeneration() == AMDGPUDeviceInfo::HD7XXX">;
let Predicates = [isSI] in {
let neverHasSideEffects = 1 in {
+
+let isMoveImm = 1 in {
def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>;
def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>;
def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
+} // End isMoveImm = 1
+
def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
@@ -28,6 +43,7 @@ def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
} // End neverHasSideEffects = 1
+
////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
@@ -96,6 +112,7 @@ def S_CMPK_EQ_I32 : SOPK <
>;
*/
+let isCompare = 1 in {
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
@@ -107,6 +124,8 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
+} // End isCompare = 1
+
def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
@@ -116,286 +135,262 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
//def EXP : EXP_ <0x00000000, "EXP", []>;
-defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>;
-defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)),
- (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)),
- (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)),
- (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)),
- (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
- (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)),
- (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>;
-defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>;
-defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>;
-defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>;
-defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>;
-defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>;
-defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>;
-def : Pat <
- (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
- (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>;
-defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>;
-
-//Side effect is writing to EXEC
-let hasSideEffects = 1 in {
-
-defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>;
-defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>;
-defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>;
-defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>;
-defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>;
-defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>;
-defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>;
-defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>;
-defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>;
-defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>;
-defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>;
-defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>;
-defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>;
-defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>;
-defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>;
-defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>;
-
-} // End hasSideEffects = 1
-
-defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>;
-defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>;
-defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>;
-defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>;
-defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>;
-defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>;
-defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>;
-defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>;
-defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>;
-defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>;
-defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>;
-defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>;
-defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>;
-defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>;
-defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>;
-defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>;
-
-//Side effect is writing to EXEC
-let hasSideEffects = 1 in {
-
-defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>;
-defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>;
-defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>;
-defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>;
-defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>;
-defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>;
-defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>;
-defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>;
-defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>;
-defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>;
-defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>;
-defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>;
-defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>;
-defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>;
-defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>;
-defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>;
-
-} // End hasSideEffects = 1
-
-defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>;
-defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>;
-defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>;
-defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>;
-defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>;
-defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>;
-defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>;
-defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>;
-defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>;
-defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>;
-defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>;
-defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>;
-defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>;
-defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>;
-defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>;
-defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>;
-defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>;
-defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>;
-defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>;
-defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>;
-defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>;
-defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>;
-defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>;
-defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>;
-defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>;
-defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>;
-defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>;
-defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>;
-defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>;
-defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>;
-defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>;
-defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>;
-defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>;
-defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>;
-defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>;
-defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>;
-defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>;
-defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>;
-defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>;
-defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>;
-defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>;
-defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>;
-defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>;
-defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>;
-defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>;
-defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>;
-defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>;
-defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>;
-defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>;
-defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>;
-defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>;
-defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>;
-defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>;
-defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>;
-defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>;
-defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>;
-defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>;
-defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>;
-defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>;
-defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>;
-defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>;
-defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>;
-defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>;
-defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>;
-defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>;
-defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>;
-def : Pat <
- (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)),
- (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>;
-def : Pat <
- (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)),
- (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>;
-def : Pat <
- (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)),
- (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>;
-def : Pat <
- (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)),
- (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>;
-def : Pat <
- (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)),
- (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>;
-def : Pat <
- (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)),
- (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1)
->;
-defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>;
-
-let hasSideEffects = 1 in {
+let isCompare = 1 in {
+
+defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">;
+defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
+defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
+defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
+defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>;
+defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
+defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64">;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64">;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64">;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64">;
+defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64">;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">;
+defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
+defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
+defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
+defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64">;
+defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
+defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
+defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
+defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32">;
+defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32">;
+defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32">;
+defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32">;
+defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32">;
+defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32">;
+defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32">;
+defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32">;
+defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32">;
+defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32">;
+defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32">;
+defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
+defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
+defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
+defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
+defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64">;
+defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64">;
+defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64">;
+defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64">;
+defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64">;
+defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64">;
+defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64">;
+defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64">;
+defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64">;
+defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64">;
+defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64">;
+defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64">;
+defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64">;
+defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64">;
+defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64">;
+defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64">;
+defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64">;
+defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64">;
+defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64">;
+defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64">;
+defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64">;
+defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64">;
+defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64">;
+defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64">;
+defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64">;
+defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64">;
+defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64">;
+defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64">;
+defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>;
+defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>;
+defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>;
+defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">;
+defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">;
+defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">;
+defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+
+defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
+
+let hasSideEffects = 1, Defs = [EXEC] in {
+defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1, Defs = [EXEC]
+
+} // End isCompare = 1
-defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>;
-defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>;
-defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>;
-defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>;
-defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>;
-defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>;
-defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>;
-defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>;
-
-} // End hasSideEffects
-
-defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>;
-defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>;
-defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>;
-defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>;
-defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>;
-defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>;
-defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>;
-defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>;
-
-let hasSideEffects = 1 in {
-
-defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>;
-defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>;
-defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>;
-defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>;
-defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>;
-defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>;
-defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>;
-defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>;
-
-} // End hasSideEffects
-
-defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>;
-defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>;
-defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>;
-defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>;
-defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>;
-defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>;
-defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>;
-defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>;
-
-let hasSideEffects = 1 in {
-
-defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>;
-defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>;
-defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>;
-defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>;
-defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>;
-defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>;
-defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>;
-defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>;
-
-} // End hasSideEffects
-
-defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>;
-defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>;
-defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>;
-defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>;
-defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>;
-defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>;
-defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>;
-defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>;
-defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>;
-defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>;
-defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>;
-defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>;
-defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>;
-defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>;
-defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>;
-defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>;
-defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>;
-defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>;
-defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>;
-defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>;
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>;
@@ -461,11 +456,13 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM
//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
-defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>;
+let mayLoad = 1 in {
+
+defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>;
//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
@@ -473,6 +470,8 @@ defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32
//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+} // mayLoad = 1
+
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
@@ -511,12 +510,12 @@ def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>;
+def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>;
-//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>;
+def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
@@ -572,19 +571,21 @@ def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
-let neverHasSideEffects = 1 in {
+
+let neverHasSideEffects = 1, isMoveImm = 1 in {
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
-} // End neverHasSideEffects
+} // End neverHasSideEffects = 1, isMoveImm = 1
+
defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
- [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))]
+ [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
>;
//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
- [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))]
+ [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
@@ -601,31 +602,35 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
- [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))]
+ [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
>;
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
-defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>;
+defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
+ [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+>;
defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
- [(set VReg_32:$dst, (frint AllReg_32:$src0))]
+ [(set VReg_32:$dst, (frint VSrc_32:$src0))]
>;
defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
- [(set VReg_32:$dst, (ffloor AllReg_32:$src0))]
+ [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
>;
defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
- [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))]
+ [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
>;
defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
-defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>;
+defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
+ [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+>;
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))]
+ [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
>;
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
defm V_RSQ_LEGACY_F32 : VOP1_32 <
0x0000002d, "V_RSQ_LEGACY_F32",
- [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))]
+ [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
>;
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
@@ -655,7 +660,7 @@ def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
(ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P1_F32",
+ "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
[]> {
let DisableEncoding = "$m0";
}
@@ -664,7 +669,7 @@ def V_INTERP_P2_F32 : VINTRP <
0x00000001,
(outs VReg_32:$dst),
(ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_P2_F32",
+ "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
[]> {
let Constraints = "$src0 = $dst";
@@ -675,10 +680,9 @@ def V_INTERP_P2_F32 : VINTRP <
def V_INTERP_MOV_F32 : VINTRP <
0x00000002,
(outs VReg_32:$dst),
- (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
- "V_INTERP_MOV_F32",
+ (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
+ "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
[]> {
- let VSRC = 0;
let DisableEncoding = "$m0";
}
@@ -695,7 +699,7 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
let isBranch = 1 in {
def S_BRANCH : SOPP <
- 0x00000002, (ins brtarget:$target), "S_BRANCH",
+ 0x00000002, (ins brtarget:$target), "S_BRANCH $target",
[(br bb:$target)]> {
let isBarrier = 1;
}
@@ -703,35 +707,35 @@ def S_BRANCH : SOPP <
let DisableEncoding = "$scc" in {
def S_CBRANCH_SCC0 : SOPP <
0x00000004, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC0", []
+ "S_CBRANCH_SCC0 $target", []
>;
def S_CBRANCH_SCC1 : SOPP <
0x00000005, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC1",
+ "S_CBRANCH_SCC1 $target",
[]
>;
} // End DisableEncoding = "$scc"
def S_CBRANCH_VCCZ : SOPP <
0x00000006, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCZ",
+ "S_CBRANCH_VCCZ $target",
[]
>;
def S_CBRANCH_VCCNZ : SOPP <
0x00000007, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCNZ",
+ "S_CBRANCH_VCCNZ $target",
[]
>;
let DisableEncoding = "$exec" in {
def S_CBRANCH_EXECZ : SOPP <
0x00000008, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECZ",
+ "S_CBRANCH_EXECZ $target",
[]
>;
def S_CBRANCH_EXECNZ : SOPP <
0x00000009, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECNZ",
+ "S_CBRANCH_EXECNZ $target",
[]
>;
} // End DisableEncoding = "$exec"
@@ -758,80 +762,101 @@ def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
- (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32",
+ (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
+ "V_CNDMASK_B32_e32 $dst, $src0, $src1, [$vcc]",
[]
>{
let DisableEncoding = "$vcc";
}
def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
- (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- "V_CNDMASK_B32_e64",
- [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))]
+ (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
+ InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
+ "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
+ [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
+ VSrc_32:$src1, VSrc_32:$src0))]
>;
//f32 pattern for V_CNDMASK_B32_e64
def : Pat <
- (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)),
- (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2)
+ (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
+ (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
>;
defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
-defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>;
-def : Pat <
- (f32 (fadd AllReg_32:$src0, VReg_32:$src1)),
- (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1)
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
+ [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
>;
+} // End isCommutable = 1
-defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>;
-def : Pat <
- (f32 (fsub AllReg_32:$src0, VReg_32:$src1)),
- (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1)
+defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
+ [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
>;
+
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>;
defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>;
+
+let isCommutable = 1 in {
+
defm V_MUL_LEGACY_F32 : VOP2_32 <
0x00000007, "V_MUL_LEGACY_F32",
- [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
>;
defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
- [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
>;
+
+} // End isCommutable = 1
+
//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>;
//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>;
//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
+
+let isCommutable = 1 in {
+
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
>;
defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
>;
+
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+
+} // End isCommutable = 1
+
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
+
+let isCommutable = 1 in {
+
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
- [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
- [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
- [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
>;
+
+} // End isCommutable = 1
+
defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>;
defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
@@ -840,23 +865,30 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let Defs = [VCC] in { // Carry-out goes to VCC
-defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32",
- [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
+
+let isCommutable = 1 in {
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
+ [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>;
-defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32",
- [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))]
+} // End isCommutable = 1
+
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
+ [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>;
+
+defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>;
+let Uses = [VCC] in { // Carry-out comes from VCC
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
+defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>;
+} // End Uses = [VCC]
} // End Defs = [VCC]
-defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>;
-defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>;
-defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>;
-defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>;
defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))]
+ [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
@@ -926,6 +958,10 @@ def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+def : Pat <
+ (mul VSrc_32:$src0, VReg_32:$src1),
+ (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+>;
def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
@@ -949,27 +985,35 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
- [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))]
+ [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
+ SReg_32:$src0, SReg_32:$src1))]
>;
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
// f32 pattern for S_CSELECT_B32
def : Pat <
- (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)),
+ (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
(S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
>;
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
- [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))]
+ [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
>;
-def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64",
- [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))]
+
+def : Pat <
+ (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
+ (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
>;
+
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
+def : Pat <
+ (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
+ (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
@@ -998,54 +1042,12 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
-class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI <
- (outs VReg_32:$dst),
- (ins immType:$src0),
- "V_MOV_IMM",
- [(set VReg_32:$dst, (immNode:$src0))]
->;
-
-let isCodeGenOnly = 1, isPseudo = 1 in {
-
-def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>;
-def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>;
-
-def S_MOV_IMM_I32 : InstSI <
- (outs SReg_32:$dst),
- (ins i32imm:$src0),
- "S_MOV_IMM_I32",
- [(set SReg_32:$dst, (imm:$src0))]
->;
-
-// i64 immediates aren't really supported in hardware, but LLVM will use the i64
-// type for indices on load and store instructions. The pattern for
-// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits,
-// which the hardware can handle.
-def S_MOV_IMM_I64 : InstSI <
- (outs SReg_64:$dst),
- (ins i64imm:$src0),
- "S_MOV_IMM_I64 $dst, $src0",
- [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))]
->;
-
-} // End isCodeGenOnly, isPseudo = 1
-
-class SI_LOAD_LITERAL<Operand ImmType> :
- Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> {
-
- bits<32> imm;
- let Inst{31-0} = imm;
-}
-
-def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>;
-def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>;
-
let isCodeGenOnly = 1, isPseudo = 1 in {
def SET_M0 : InstSI <
(outs SReg_32:$dst),
(ins i32imm:$src0),
- "SET_M0",
+ "SET_M0 $dst, $src0",
[(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
>;
@@ -1058,13 +1060,6 @@ def LOAD_CONST : AMDGPUShaderInst <
let usesCustomInserter = 1 in {
-def SI_V_CNDLT : InstSI <
- (outs VReg_32:$dst),
- (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
- "SI_V_CNDLT $dst, $src0, $src1, $src2",
- [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))]
->;
-
def SI_INTERP : InstSI <
(outs VReg_32:$dst),
(ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
@@ -1072,21 +1067,6 @@ def SI_INTERP : InstSI <
[]
>;
-def SI_INTERP_CONST : InstSI <
- (outs VReg_32:$dst),
- (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
- "SI_INTERP_CONST $dst, $attr_chan, $attr, $params",
- [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan,
- imm:$attr, SReg_32:$params))]
->;
-
-def SI_KIL : InstSI <
- (outs),
- (ins VReg_32:$src),
- "SI_KIL $src",
- [(int_AMDGPU_kill VReg_32:$src)]
->;
-
def SI_WQM : InstSI <
(outs),
(ins),
@@ -1106,15 +1086,15 @@ let isBranch = 1, isTerminator = 1 in {
def SI_IF : InstSI <
(outs SReg_64:$dst),
- (ins SReg_1:$vcc, brtarget:$target),
- "SI_IF",
- [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))]
+ (ins SReg_64:$vcc, brtarget:$target),
+ "SI_IF $dst, $vcc, $target",
+ [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
>;
def SI_ELSE : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target),
- "SI_ELSE",
+ "SI_ELSE $dst, $src, $target",
[(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
let Constraints = "$src = $dst";
@@ -1123,7 +1103,7 @@ def SI_ELSE : InstSI <
def SI_LOOP : InstSI <
(outs),
(ins SReg_64:$saved, brtarget:$target),
- "SI_LOOP",
+ "SI_LOOP $saved, $target",
[(int_SI_loop SReg_64:$saved, bb:$target)]
>;
@@ -1132,43 +1112,60 @@ def SI_LOOP : InstSI <
def SI_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src),
- "SI_ELSE",
+ "SI_ELSE $dst, $src",
[(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
>;
def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst),
- (ins SReg_1:$vcc, SReg_64:$src),
- "SI_IF_BREAK",
- [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))]
+ (ins SReg_64:$vcc, SReg_64:$src),
+ "SI_IF_BREAK $dst, $vcc, $src",
+ [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
>;
def SI_ELSE_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src0, SReg_64:$src1),
- "SI_ELSE_BREAK",
+ "SI_ELSE_BREAK $dst, $src0, $src1",
[(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
>;
def SI_END_CF : InstSI <
(outs),
(ins SReg_64:$saved),
- "SI_END_CF",
+ "SI_END_CF $saved",
[(int_SI_end_cf SReg_64:$saved)]
>;
+def SI_KILL : InstSI <
+ (outs),
+ (ins VReg_32:$src),
+ "SI_KIL $src",
+ [(int_AMDGPU_kill VReg_32:$src)]
+>;
+
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
} // end IsCodeGenOnly, isPseudo
+def : Pat<
+ (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
+ (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+>;
+
+def : Pat <
+ (int_AMDGPU_kilp),
+ (SI_KILL (V_MOV_B32_e32 0xbf800000))
+>;
+
/* int_SI_vs_load_input */
def : Pat<
(int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
VReg_32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
- 0, 0, (i32 SREG_LIT_0))
+ 0, 0, 0)
>;
/* int_SI_export */
@@ -1179,43 +1176,101 @@ def : Pat <
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
>;
-/* int_SI_sample */
-def : Pat <
- (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
- (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
- SReg_256:$rsrc, SReg_128:$sampler)
->;
-/* int_SI_sample_lod */
+/* int_SI_sample for simple 1D texture lookup */
def : Pat <
- (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
- (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
- SReg_256:$rsrc, SReg_128:$sampler)
+ (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, imm),
+ (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
+ (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)),
+ SReg_256:$rsrc, SReg_128:$sampler)
>;
-/* int_SI_sample_bias */
-def : Pat <
- (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler),
- (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord,
- SReg_256:$rsrc, SReg_128:$sampler)
->;
+class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, imm),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
+ (EXTRACT_SUBREG addr_class:$addr, sub0),
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
+ (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0,
+ (EXTRACT_SUBREG addr_class:$addr, sub0),
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
+ ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0,
+ (EXTRACT_SUBREG addr_class:$addr, sub0),
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowPattern<Intrinsic name, MIMG opcode,
+ RegisterClass addr_class, ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
+ (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0,
+ (EXTRACT_SUBREG addr_class:$addr, sub0),
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
+ RegisterClass addr_class, ValueType addr_type> : Pat <
+ (name imm:$writemask, (addr_type addr_class:$addr),
+ SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
+ (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0,
+ (EXTRACT_SUBREG addr_class:$addr, sub0),
+ SReg_256:$rsrc, SReg_128:$sampler)
+>;
+
+/* int_SI_sample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
+ def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
+
+ def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
+
+ def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+ def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
+ def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+}
-def CLAMP_SI : CLAMP<VReg_32>;
-def FABS_SI : FABS<VReg_32>;
-def FNEG_SI : FNEG<VReg_32>;
+defm : SamplePatterns<VReg_64, v2i32>;
+defm : SamplePatterns<VReg_128, v4i32>;
+defm : SamplePatterns<VReg_256, v8i32>;
+defm : SamplePatterns<VReg_512, v16i32>;
-def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
+def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
+def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
+def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
+def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
+def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
+def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
+def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
+def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1223,24 +1278,68 @@ def : BitConvert <i32, f32, VReg_32>;
def : BitConvert <f32, i32, SReg_32>;
def : BitConvert <f32, i32, VReg_32>;
+/********** =================== **********/
+/********** Src & Dst modifiers **********/
+/********** =================== **********/
+
+def : Pat <
+ (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+ (fabs VReg_32:$src),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+>;
+
+def : Pat <
+ (fneg VReg_32:$src),
+ (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
+>;
+
+/********** ================== **********/
+/********** Immediate Patterns **********/
+/********** ================== **********/
+
+def : Pat <
+ (i1 imm:$imm),
+ (S_MOV_B64 imm:$imm)
+>;
+
def : Pat <
- (i64 (SIsreg1_bitcast SReg_1:$vcc)),
- (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64))
+ (i32 imm:$imm),
+ (V_MOV_B32_e32 imm:$imm)
>;
def : Pat <
- (i1 (SIsreg1_bitcast SReg_64:$vcc)),
- (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1)
+ (f32 fpimm:$imm),
+ (V_MOV_B32_e32 fpimm:$imm)
>;
def : Pat <
- (i64 (SIvcc_bitcast VCCReg:$vcc)),
- (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64))
+ (i32 imm:$imm),
+ (S_MOV_B32 imm:$imm)
>;
def : Pat <
- (i1 (SIvcc_bitcast SReg_64:$vcc)),
- (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
+ (f32 fpimm:$imm),
+ (S_MOV_B32 fpimm:$imm)
+>;
+
+def : Pat <
+ (i64 InlineImm<i64>:$imm),
+ (S_MOV_B64 InlineImm<i64>:$imm)
+>;
+
+// i64 immediates aren't supported in hardware, split it into two 32bit values
+def : Pat <
+ (i64 imm:$imm),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
+ (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
>;
/********** ===================== **********/
@@ -1248,6 +1347,12 @@ def : Pat <
/********** ===================== **********/
def : Pat <
+ (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params),
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr,
+ (S_MOV_B32 SReg_32:$params))
+>;
+
+def : Pat <
(int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
(SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
imm:$attr, SReg_32:$params)
@@ -1305,47 +1410,86 @@ def : Pat <
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>;
def : Pat <
- (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1),
- (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1))
+ (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
+ (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
>;
def : Pat<
- (fdiv AllReg_32:$src0, AllReg_32:$src1),
- (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
+ (fdiv VSrc_32:$src0, VSrc_32:$src1),
+ (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
>;
def : Pat <
- (int_AMDGPU_kilp),
- (SI_KIL (V_MOV_IMM_I32 0xbf800000))
+ (fcos VSrc_32:$src0),
+ (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+>;
+
+def : Pat <
+ (fsin VSrc_32:$src0),
+ (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>;
def : Pat <
(int_AMDGPU_cube VReg_128:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_x),
- (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_y),
- (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_z),
- (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
- (EXTRACT_SUBREG VReg_128:$src, sel_y),
- (EXTRACT_SUBREG VReg_128:$src, sel_z),
- 0, 0, 0, 0), sel_w)
+ (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub0),
+ (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub1),
+ (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub2),
+ (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
+ (EXTRACT_SUBREG VReg_128:$src, sub1),
+ (EXTRACT_SUBREG VReg_128:$src, sub2),
+ 0, 0, 0, 0), sub3)
+>;
+
+def : Pat <
+ (i32 (sext (i1 SReg_64:$src0))),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
>;
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
-def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)),
- (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2,
+def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
+ (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
0, 0, 0, 0)>;
+/********** ================== **********/
+/********** SMRD Patterns **********/
+/********** ================== **********/
+
+multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+ // 1. Offset as 8bit DWORD immediate
+ def : Pat <
+ (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
+ (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+ >;
+
+ // 2. Offset loaded in an 32bit SGPR
+ def : Pat <
+ (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
+ (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+ >;
+
+ // 3. No offset at all
+ def : Pat <
+ (constant_load SReg_64:$sbase),
+ (vt (Instr_IMM SReg_64:$sbase, 0))
+ >;
+}
+
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+
} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index c322fef..611b9c4 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -22,9 +22,11 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
def int_SI_wqm : Intrinsic <[], [], []>;
- def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
- def int_SI_sample_bias : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
- def int_SI_sample_lod : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>;
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>;
+
+ def int_SI_sample : Sample;
+ def int_SI_sampleb : Sample;
+ def int_SI_samplel : Sample;
/* Interpolation Intrinsics */
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 3fbe653..b215aa2 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -68,7 +68,10 @@ private:
static char ID;
const TargetInstrInfo *TII;
- void Skip(MachineInstr &MI, MachineOperand &To);
+ bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
+
+ void Skip(MachineInstr &From, MachineOperand &To);
+ void SkipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
void Else(MachineInstr &MI);
@@ -78,6 +81,7 @@ private:
void Loop(MachineInstr &MI);
void EndCf(MachineInstr &MI);
+ void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
public:
@@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm);
}
-void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+
unsigned NumInstr = 0;
- for (MachineBasicBlock *MBB = *From.getParent()->succ_begin();
- NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
+ for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
MBB = *MBB->succ_begin()) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
NumInstr < SkipThreshold && I != E; ++I) {
if (I->isBundle() || !I->isBundled())
- ++NumInstr;
+ if (++NumInstr >= SkipThreshold)
+ return true;
}
}
- if (NumInstr < SkipThreshold)
+ return false;
+}
+
+void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
+
+ if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
return;
DebugLoc DL = From.getDebugLoc();
@@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
.addReg(AMDGPU::EXEC);
}
+void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+ return;
+
+ MachineBasicBlock::iterator Insert = &MI;
+ ++Insert;
+
+ // If the exec mask is non-zero, skip the next two instructions
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(3)
+ .addReg(AMDGPU::EXEC);
+
+ // Exec mask is zero: Export to NULL target...
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
+ .addImm(0)
+ .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+ .addImm(0)
+ .addImm(1)
+ .addImm(1)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0)
+ .addReg(AMDGPU::VGPR0);
+
+ // ... and terminate wavefront
+ BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+}
+
void SILowerControlFlowPass::If(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
@@ -242,8 +285,27 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
assert(0);
}
+void SILowerControlFlowPass::Kill(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Kill is only allowed in pixel shaders
+ assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+ ShaderType::PIXEL);
+
+ // Clear this pixel from the exec mask if the operand is negative
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ .addImm(0)
+ .addOperand(MI.getOperand(0));
+
+ MI.eraseFromParent();
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
- bool HaveCf = false;
+
+ bool HaveKill = false;
+ unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -257,6 +319,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
+ ++Depth;
If(MI);
break;
@@ -277,14 +340,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
break;
case AMDGPU::SI_LOOP:
+ ++Depth;
Loop(MI);
break;
case AMDGPU::SI_END_CF:
- HaveCf = true;
+ if (--Depth == 0 && HaveKill) {
+ SkipIfDead(MI);
+ HaveKill = false;
+ }
EndCf(MI);
break;
+ case AMDGPU::SI_KILL:
+ if (Depth == 0)
+ SkipIfDead(MI);
+ else
+ HaveKill = true;
+ Kill(MI);
+ break;
+
case AMDGPU::S_BRANCH:
Branch(MI);
break;
@@ -292,40 +367,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // TODO: What is this good for?
- unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
- if (HaveCf && ShaderType == ShaderType::PIXEL) {
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- if (MBB.succ_empty()) {
-
- MachineInstr &MI = *MBB.getFirstNonPHI();
- DebugLoc DL = MI.getDebugLoc();
-
- // If the exec mask is non-zero, skip the next two instructions
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
-
- // Exec mask is zero: Export to NULL target...
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
- .addImm(0)
- .addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0)
- .addReg(AMDGPU::SREG_LIT_0);
-
- // ... and terminate wavefront
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
- }
- }
- }
-
return true;
}
diff --git a/lib/Target/R600/SILowerLiteralConstants.cpp b/lib/Target/R600/SILowerLiteralConstants.cpp
deleted file mode 100644
index c0411e9..0000000
--- a/lib/Target/R600/SILowerLiteralConstants.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass performs the following transformation on instructions with
-/// literal constants:
-///
-/// %VGPR0 = V_MOV_IMM_I32 1
-///
-/// becomes:
-///
-/// BUNDLE
-/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
-/// * SI_LOAD_LITERAL 1
-///
-/// The resulting sequence matches exactly how the hardware handles immediate
-/// operands, so this transformation greatly simplifies the code generator.
-///
-/// Only the *_MOV_IMM_* support immediate operands at the moment, but when
-/// support for immediate operands is added to other instructions, they
-/// will be lowered here as well.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-
-using namespace llvm;
-
-namespace {
-
-class SILowerLiteralConstantsPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const TargetInstrInfo *TII;
-
-public:
- SILowerLiteralConstantsPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const {
- return "SI Lower literal constants pass";
- }
-};
-
-} // End anonymous namespace
-
-char SILowerLiteralConstantsPass::ID = 0;
-
-FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
- return new SILowerLiteralConstantsPass(tm);
-}
-
-bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next) {
- Next = llvm::next(I);
- MachineInstr &MI = *I;
- switch (MI.getOpcode()) {
- default: break;
- case AMDGPU::S_MOV_IMM_I32:
- case AMDGPU::S_MOV_IMM_I64:
- case AMDGPU::V_MOV_IMM_F32:
- case AMDGPU::V_MOV_IMM_I32: {
- unsigned MovOpcode;
- unsigned LoadLiteralOpcode;
- MachineOperand LiteralOp = MI.getOperand(1);
- if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
- MovOpcode = AMDGPU::V_MOV_B32_e32;
- } else {
- MovOpcode = AMDGPU::S_MOV_B32;
- }
- if (LiteralOp.isImm()) {
- LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
- } else {
- LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
- }
- MIBundleBuilder Bundle(MBB, I);
- Bundle
- .append(BuildMI(MF, MBB.findDebugLoc(I), TII->get(MovOpcode),
- MI.getOperand(0).getReg())
- .addReg(AMDGPU::SI_LITERAL_CONSTANT))
- .append(BuildMI(MF, MBB.findDebugLoc(I),
- TII->get(LoadLiteralOpcode))
- .addOperand(MI.getOperand(1)));
- llvm::finalizeBundle(MBB, Bundle.begin());
- MI.eraseFromParent();
- break;
- }
- }
- }
- }
- return false;
-}
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index c3f1361..9e04e24 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -1,44 +1,40 @@
-
-let Namespace = "AMDGPU" in {
- def low : SubRegIndex;
- def high : SubRegIndex;
-
- def sub0 : SubRegIndex;
- def sub1 : SubRegIndex;
- def sub2 : SubRegIndex;
- def sub3 : SubRegIndex;
- def sub4 : SubRegIndex;
- def sub5 : SubRegIndex;
- def sub6 : SubRegIndex;
- def sub7 : SubRegIndex;
-}
+//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the SI registers
+//===----------------------------------------------------------------------===//
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
let Namespace = "AMDGPU";
let HWEncoding = encoding;
}
-class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices = [low, high];
- let HWEncoding = encoding;
-}
-
-class SGPR_32 <bits<16> num, string name> : SIReg<name, num>;
-
-class VGPR_32 <bits<16> num, string name> : SIReg<name, num>;
-
// Special Registers
def VCC : SIReg<"VCC", 106>;
-def EXEC_LO : SIReg <"EXEC LO", 126>;
-def EXEC_HI : SIReg <"EXEC HI", 127>;
-def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>;
+def EXEC : SIReg<"EXEC", 126>;
def SCC : SIReg<"SCC", 253>;
-def SREG_LIT_0 : SIReg <"S LIT 0", 128>;
-def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>;
def M0 : SIReg <"M0", 124>;
-//Interpolation registers
+// SGPR registers
+foreach Index = 0-101 in {
+ def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+}
+
+// VGPR registers
+foreach Index = 0-255 in {
+ def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+ let HWEncoding{8} = 1;
+ }
+}
+
+// virtual Interpolation registers
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
@@ -64,73 +60,150 @@ def ANCILLARY : SIReg <"ANCILLARY">;
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
-// SGPR 32-bit registers
-foreach Index = 0-101 in {
- def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>;
-}
+//===----------------------------------------------------------------------===//
+// Groupings using register classes and tuples
+//===----------------------------------------------------------------------===//
+// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
-def SGPR_64 : RegisterTuples<[low, high],
- [(add (decimate SGPR_32, 2)),
- (add(decimate (rotl SGPR_32, 1), 2))]>;
+def SGPR_64 : RegisterTuples<[sub0, sub1],
+ [(add (decimate (trunc SGPR_32, 101), 2)),
+ (add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
-def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
- [(add (decimate SGPR_32, 4)),
- (add (decimate (rotl SGPR_32, 1), 4)),
- (add (decimate (rotl SGPR_32, 2), 4)),
- (add (decimate (rotl SGPR_32, 3), 4))]>;
+def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+ [(add (decimate (trunc SGPR_32, 99), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add (decimate SGPR_32, 8)),
- (add (decimate (rotl SGPR_32, 1), 8)),
- (add (decimate (rotl SGPR_32, 2), 8)),
- (add (decimate (rotl SGPR_32, 3), 8)),
- (add (decimate (rotl SGPR_32, 4), 8)),
- (add (decimate (rotl SGPR_32, 5), 8)),
- (add (decimate (rotl SGPR_32, 6), 8)),
- (add (decimate (rotl SGPR_32, 7), 8))]>;
+ [(add (decimate (trunc SGPR_32, 95), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4))]>;
+
+// SGPR 512-bit registers
+def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (decimate (trunc SGPR_32, 87), 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4)),
+ (add (decimate (shl SGPR_32, 8), 4)),
+ (add (decimate (shl SGPR_32, 9), 4)),
+ (add (decimate (shl SGPR_32, 10), 4)),
+ (add (decimate (shl SGPR_32, 11), 4)),
+ (add (decimate (shl SGPR_32, 12), 4)),
+ (add (decimate (shl SGPR_32, 13), 4)),
+ (add (decimate (shl SGPR_32, 14), 4)),
+ (add (decimate (shl SGPR_32, 15), 4))]>;
// VGPR 32-bit registers
-foreach Index = 0-255 in {
- def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>;
-}
-
def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
-def VGPR_64 : RegisterTuples<[low, high],
- [(add VGPR_32),
- (add (rotl VGPR_32, 1))]>;
+def VGPR_64 : RegisterTuples<[sub0, sub1],
+ [(add (trunc VGPR_32, 255)),
+ (add (shl VGPR_32, 1))]>;
// VGPR 128-bit registers
-def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
- [(add VGPR_32),
- (add (rotl VGPR_32, 1)),
- (add (rotl VGPR_32, 2)),
- (add (rotl VGPR_32, 3))]>;
+def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
+ [(add (trunc VGPR_32, 253)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3))]>;
+
+// VGPR 256-bit registers
+def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
+ [(add (trunc VGPR_32, 249)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7))]>;
+
+// VGPR 512-bit registers
+def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
+ [(add (trunc VGPR_32, 241)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7)),
+ (add (shl VGPR_32, 8)),
+ (add (shl VGPR_32, 9)),
+ (add (shl VGPR_32, 10)),
+ (add (shl VGPR_32, 11)),
+ (add (shl VGPR_32, 12)),
+ (add (shl VGPR_32, 13)),
+ (add (shl VGPR_32, 14)),
+ (add (shl VGPR_32, 15))]>;
+
+//===----------------------------------------------------------------------===//
+// Register classes used as source and destination
+//===----------------------------------------------------------------------===//
+
+// Special register classes for predicates and the M0 register
+def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
+def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
+def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
+def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
+ (add SGPR_32, M0Reg)
>;
-def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>;
-
-def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>;
+def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
+ (add SGPR_64, VCCReg, EXECReg)
+>;
def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
+
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add VGPR_32,
+def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
+
+def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
+
+//===----------------------------------------------------------------------===//
+// [SV]Src_* register classes, can have either an immediate or an register
+//===----------------------------------------------------------------------===//
+
+def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
+
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
+
+def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+ (add VReg_32, SReg_32,
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
PERSP_CENTER_I, PERSP_CENTER_J,
PERSP_CENTROID_I, PERSP_CENTROID_J,
@@ -147,21 +220,8 @@ def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
ANCILLARY,
SAMPLE_COVERAGE,
POS_FIXED_PT
- )
+ )
>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>;
-
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>;
-
-// AllReg_* - A set of all scalar and vector registers of a given width.
-def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>;
-
-def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>;
-
-// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
-def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
-def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
+def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index f5e10fc..3d4bfdc 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -21,8 +21,9 @@ void SparcELFMCAsmInfo::anchor() { }
SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
IsLittleEndian = false;
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::sparcv9)
- PointerSize = 8;
+ if (TheTriple.getArch() == Triple::sparcv9) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 6c47c70..a0dae6e 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -67,6 +67,22 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+void SparcFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+
void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 6b593c9..464233e 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -32,6 +32,10 @@ public:
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool hasFP(const MachineFunction &MF) const { return false; }
};
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 168640f..138b92d 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -95,15 +95,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (MF.getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ // Make room for the return address offset.
+ RetOps.push_back(SDValue());
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -115,6 +110,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
@@ -127,18 +123,19 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
Flag = Chain.getValue(1);
- if (MF.getRegInfo().liveout_empty())
- MF.getRegInfo().addLiveOut(SP::I0);
+ RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
}
- SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+ RetOps[0] = Chain; // Update chain.
+ RetOps[1] = DAG.getConstant(RetAddrOffset, MVT::i32);
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
- RetAddrOffsetNode, Flag);
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
- RetAddrOffsetNode);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
@@ -759,10 +756,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FMA , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index e64c140..90b698d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -126,7 +126,7 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall,
def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
[SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 9c1c30b..25e90b7 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -56,45 +56,27 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-void SparcRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MachineInstr &MI = *I;
- DebugLoc dl = MI.getDebugLoc();
- int Size = MI.getOperand(0).getImm();
- if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
- Size = -Size;
- if (Size)
- BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
- MBB.erase(I);
-}
-
void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(i+1).getImm();
+ MI.getOperand(FIOperandNum + 1).getImm();
// Replace frame index with a frame pointer reference.
if (Offset >= -4096 && Offset <= 4095) {
// If the offset is small enough to fit in the immediate field, directly
// encode it.
- MI.getOperand(i).ChangeToRegister(SP::I6, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
} else {
// Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
// scavenge a register here instead of reserving G1 all of the time.
@@ -104,8 +86,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
.addReg(SP::I6);
// Insert: G1+%lo(offset) into the user.
- MI.getOperand(i).ChangeToRegister(SP::G1, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
}
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 9515ad3..357879b 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -36,12 +36,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ca438eb..b2c6d55 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -57,7 +57,7 @@ private:
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
- X86Operand *ParseIntelTypeOperator(SMLoc StartLoc);
+ X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
@@ -168,6 +168,7 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SMLoc OffsetOfLoc;
+ bool AddressOf;
union {
struct {
@@ -340,6 +341,10 @@ struct X86Operand : public MCParsedAsmOperand {
return OffsetOfLoc.getPointer();
}
+ bool needAddressOf() const {
+ return AddressOf;
+ }
+
bool needSizeDirective() const {
assert(Kind == Memory && "Invalid access!");
return Mem.NeedSizeDir;
@@ -471,9 +476,11 @@ struct X86Operand : public MCParsedAsmOperand {
}
static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ bool AddressOf = false,
SMLoc OffsetOfLoc = SMLoc()) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
+ Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
return Res;
}
@@ -488,7 +495,7 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false){
+ unsigned Size = 0, bool NeedSizeDir = false) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -497,6 +504,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
Res->Mem.NeedSizeDir = NeedSizeDir;
+ Res->AddressOf = false;
return Res;
}
@@ -520,6 +528,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
Res->Mem.NeedSizeDir = NeedSizeDir;
+ Res->AddressOf = false;
return Res;
}
};
@@ -675,115 +684,299 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
return Size;
}
+enum IntelBracExprState {
+ IBES_START,
+ IBES_LBRAC,
+ IBES_RBRAC,
+ IBES_REGISTER,
+ IBES_REGISTER_STAR,
+ IBES_REGISTER_STAR_INTEGER,
+ IBES_INTEGER,
+ IBES_INTEGER_STAR,
+ IBES_INDEX_REGISTER,
+ IBES_IDENTIFIER,
+ IBES_DISP_EXPR,
+ IBES_MINUS,
+ IBES_ERROR
+};
+
+class IntelBracExprStateMachine {
+ IntelBracExprState State;
+ unsigned BaseReg, IndexReg, Scale;
+ int64_t Disp;
+
+ unsigned TmpReg;
+ int64_t TmpInteger;
+
+ bool isPlus;
+
+public:
+ IntelBracExprStateMachine(MCAsmParser &parser) :
+ State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0),
+ TmpReg(0), TmpInteger(0), isPlus(true) {}
+
+ unsigned getBaseReg() { return BaseReg; }
+ unsigned getIndexReg() { return IndexReg; }
+ unsigned getScale() { return Scale; }
+ int64_t getDisp() { return Disp; }
+ bool isValidEndState() { return State == IBES_RBRAC; }
+
+ void onPlus() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_INTEGER:
+ State = IBES_START;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_START;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_START;
+ break;
+ }
+ isPlus = true;
+ }
+ void onMinus() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_MINUS;
+ break;
+ case IBES_INTEGER:
+ State = IBES_START;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_START;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_START;
+ break;
+ }
+ isPlus = false;
+ }
+ void onRegister(unsigned Reg) {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_REGISTER;
+ TmpReg = Reg;
+ break;
+ case IBES_INTEGER_STAR:
+ assert (!IndexReg && "IndexReg already set!");
+ State = IBES_INDEX_REGISTER;
+ IndexReg = Reg;
+ Scale = TmpInteger;
+ break;
+ }
+ }
+ void onDispExpr() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_DISP_EXPR;
+ break;
+ }
+ }
+ void onInteger(int64_t TmpInt) {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_START:
+ State = IBES_INTEGER;
+ TmpInteger = TmpInt;
+ break;
+ case IBES_MINUS:
+ State = IBES_INTEGER;
+ TmpInteger = TmpInt;
+ break;
+ case IBES_REGISTER_STAR:
+ assert (!IndexReg && "IndexReg already set!");
+ State = IBES_INDEX_REGISTER;
+ IndexReg = TmpReg;
+ Scale = TmpInt;
+ break;
+ }
+ }
+ void onStar() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_INTEGER:
+ State = IBES_INTEGER_STAR;
+ break;
+ case IBES_REGISTER:
+ State = IBES_REGISTER_STAR;
+ break;
+ }
+ }
+ void onLBrac() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_RBRAC:
+ State = IBES_START;
+ isPlus = true;
+ break;
+ }
+ }
+ void onRBrac() {
+ switch (State) {
+ default:
+ State = IBES_ERROR;
+ break;
+ case IBES_DISP_EXPR:
+ State = IBES_RBRAC;
+ break;
+ case IBES_INTEGER:
+ State = IBES_RBRAC;
+ if (isPlus)
+ Disp += TmpInteger;
+ else
+ Disp -= TmpInteger;
+ break;
+ case IBES_REGISTER:
+ State = IBES_RBRAC;
+ // If we already have a BaseReg, then assume this is the IndexReg with a
+ // scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ break;
+ case IBES_INDEX_REGISTER:
+ State = IBES_RBRAC;
+ break;
+ }
+ }
+};
+
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned Size) {
- unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
const AsmToken &Tok = Parser.getTok();
SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
- const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
- // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
-
// Eat '['
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
Parser.Lex();
+ unsigned TmpReg = 0;
+
+ // Try to handle '[' 'symbol' ']'
if (getLexer().is(AsmToken::Identifier)) {
- // Parse BaseReg
- if (ParseRegister(BaseReg, Start, End)) {
- // Handle '[' 'symbol' ']'
- if (getParser().ParseExpression(Disp, End)) return 0;
+ if (ParseRegister(TmpReg, Start, End)) {
+ const MCExpr *Disp;
+ if (getParser().parseExpression(Disp, End))
+ return 0;
+
if (getLexer().isNot(AsmToken::RBrac))
return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
- End = Parser.getTok().getEndLoc();
+ // Adjust the EndLoc due to the ']'.
+ End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
Parser.Lex();
return X86Operand::CreateMem(Disp, Start, End, Size);
}
- } else if (getLexer().is(AsmToken::Integer)) {
- int64_t Val = Tok.getIntVal();
- Parser.Lex();
- SMLoc Loc = Tok.getLoc();
- if (getLexer().is(AsmToken::RBrac)) {
- // Handle '[' number ']'
- End = Parser.getTok().getEndLoc();
- Parser.Lex();
- const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
- if (SegReg)
- return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
- Start, End, Size);
- return X86Operand::CreateMem(Disp, Start, End, Size);
- } else if (getLexer().is(AsmToken::Star)) {
- // Handle '[' Scale*IndexReg ']'
- Parser.Lex();
- SMLoc IdxRegLoc = Tok.getLoc();
- if (ParseRegister(IndexReg, IdxRegLoc, End))
- return ErrorOperand(IdxRegLoc, "Expected register");
- Scale = Val;
- } else
- return ErrorOperand(Loc, "Unexpected token");
}
- // Parse ][ as a plus.
- bool ExpectRBrac = true;
- if (getLexer().is(AsmToken::RBrac)) {
- ExpectRBrac = false;
- End = Parser.getTok().getEndLoc();
- Parser.Lex();
- }
+ // Parse [ BaseReg + Scale*IndexReg + Disp ].
+ bool Done = false;
+ IntelBracExprStateMachine SM(Parser);
+
+ // If we parsed a register, then the end loc has already been set and
+ // the identifier has already been lexed. We also need to update the
+ // state.
+ if (TmpReg)
+ SM.onRegister(TmpReg);
+
+ const MCExpr *Disp = 0;
+ while (!Done) {
+ bool UpdateLocLex = true;
- if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) ||
- getLexer().is(AsmToken::LBrac)) {
- ExpectRBrac = true;
- bool isPlus = getLexer().is(AsmToken::Plus) ||
- getLexer().is(AsmToken::LBrac);
- Parser.Lex();
- SMLoc PlusLoc = Tok.getLoc();
- if (getLexer().is(AsmToken::Integer)) {
+ // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
+ // identifier. Don't try an parse it as a register.
+ if (Tok.getString().startswith("."))
+ break;
+
+ switch (getLexer().getKind()) {
+ default: {
+ if (SM.isValidEndState()) {
+ Done = true;
+ break;
+ }
+ return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ }
+ case AsmToken::Identifier: {
+ // This could be a register or a displacement expression.
+ if(!ParseRegister(TmpReg, Start, End)) {
+ SM.onRegister(TmpReg);
+ UpdateLocLex = false;
+ break;
+ } else if (!getParser().parseExpression(Disp, End)) {
+ SM.onDispExpr();
+ UpdateLocLex = false;
+ break;
+ }
+ return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ }
+ case AsmToken::Integer: {
int64_t Val = Tok.getIntVal();
- Parser.Lex();
- if (getLexer().is(AsmToken::Star)) {
- Parser.Lex();
- SMLoc IdxRegLoc = Tok.getLoc();
- if (ParseRegister(IndexReg, IdxRegLoc, End))
- return ErrorOperand(IdxRegLoc, "Expected register");
- Scale = Val;
- } else if (getLexer().is(AsmToken::RBrac)) {
- const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
- Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
- } else
- return ErrorOperand(PlusLoc, "unexpected token after +");
- } else if (getLexer().is(AsmToken::Identifier)) {
- // This could be an index register or a displacement expression.
- if (!IndexReg)
- ParseRegister(IndexReg, Start, End);
- else if (getParser().ParseExpression(Disp, End))
- return 0;
+ SM.onInteger(Val);
+ break;
}
- }
-
- // Parse ][ as a plus.
- if (getLexer().is(AsmToken::RBrac)) {
- ExpectRBrac = false;
- End = Parser.getTok().getEndLoc();
- Parser.Lex();
- if (getLexer().is(AsmToken::LBrac)) {
- ExpectRBrac = true;
- Parser.Lex();
- if (getParser().ParseExpression(Disp, End))
- return 0;
+ case AsmToken::Plus: SM.onPlus(); break;
+ case AsmToken::Minus: SM.onMinus(); break;
+ case AsmToken::Star: SM.onStar(); break;
+ case AsmToken::LBrac: SM.onLBrac(); break;
+ case AsmToken::RBrac: SM.onRBrac(); break;
+ }
+ if (!Done && UpdateLocLex) {
+ End = Tok.getLoc();
+ Parser.Lex(); // Consume the token.
}
- } else if (ExpectRBrac) {
- if (getParser().ParseExpression(Disp, End))
- return 0;
}
- if (ExpectRBrac) {
- if (getLexer().isNot(AsmToken::RBrac))
- return ErrorOperand(End, "expected ']' token!");
- End = Parser.getTok().getEndLoc();
- Parser.Lex();
- }
+ if (!Disp)
+ Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
// Parse the dot operator (e.g., [ebx].foo.bar).
if (Tok.getString().startswith(".")) {
@@ -797,10 +990,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
Disp = NewDisp;
}
+ int BaseReg = SM.getBaseReg();
+ int IndexReg = SM.getIndexReg();
+
// handle [-42]
- if (!BaseReg && !IndexReg)
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ if (!BaseReg && !IndexReg) {
+ if (!SegReg)
+ return X86Operand::CreateMem(Disp, Start, End);
+ else
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+ }
+ int Scale = SM.getScale();
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
Start, End, Size);
}
@@ -832,28 +1033,43 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
}
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
- if (getParser().ParseExpression(Disp, End))
+ if (getParser().parseExpression(Disp, End))
return 0;
bool NeedSizeDir = false;
- if (!Size && isParsingInlineAsm()) {
+ bool IsVarDecl = false;
+ if (isParsingInlineAsm()) {
if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
const MCSymbol &Sym = SymRef->getSymbol();
// FIXME: The SemaLookup will fail if the name is anything other then an
// identifier.
// FIXME: Pass a valid SMLoc.
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
+ unsigned tLength, tSize, tType;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+ tSize, tType, IsVarDecl);
+ if (!Size)
+ Size = tType * 8; // Size is in terms of bits in this context.
NeedSizeDir = Size > 0;
}
}
if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- else
+ else {
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
// When parsing inline assembly we set the base register to a non-zero value
// as we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
/*Scale*/1, Start, End, Size, NeedSizeDir);
+ }
}
/// Parse the '.' operator.
@@ -919,7 +1135,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
SMLoc End;
const MCExpr *Val;
- if (getParser().ParseExpression(Val, End))
+ if (getParser().parseExpression(Val, End))
return ErrorOperand(Start, "Unable to parse expression!");
// Don't emit the offset operator.
@@ -929,13 +1145,23 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
// register operand to ensure proper matching. Just pick a GPR based on
// the size of a pointer.
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
+ return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
+ OffsetOfLoc);
}
-/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or
-/// C++ type or variable. If the variable is an array, TYPE returns the size of
-/// a single element of the array.
-X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
+enum IntelOperatorKind {
+ IOK_LENGTH,
+ IOK_SIZE,
+ IOK_TYPE
+};
+
+/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
+/// returns the number of elements in an array. It returns the value 1 for
+/// non-array variables. The SIZE operator returns the size of a C or C++
+/// variable. A variable's size is the product of its LENGTH and TYPE. The
+/// TYPE operator returns the size of a C or C++ type or variable. If the
+/// variable is an array, TYPE returns the size of a single element.
+X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
SMLoc TypeLoc = Start;
Parser.Lex(); // Eat offset.
Start = Parser.getTok().getLoc();
@@ -943,60 +1169,63 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
SMLoc End;
const MCExpr *Val;
- if (getParser().ParseExpression(Val, End))
+ if (getParser().parseExpression(Val, End))
return 0;
- unsigned Size = 0;
+ unsigned Length = 0, Size = 0, Type = 0;
if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
const MCSymbol &Sym = SymRef->getSymbol();
// FIXME: The SemaLookup will fail if the name is anything other then an
// identifier.
// FIXME: Pass a valid SMLoc.
- if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size))
- return ErrorOperand(Start, "Unable to lookup TYPE of expr!");
-
- Size /= 8; // Size is in terms of bits, but we want bytes in the context.
+ bool IsVarDecl;
+ if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
+ Size, Type, IsVarDecl))
+ return ErrorOperand(Start, "Unable to lookup expr!");
+ }
+ unsigned CVal;
+ switch(OpKind) {
+ default: llvm_unreachable("Unexpected operand kind!");
+ case IOK_LENGTH: CVal = Length; break;
+ case IOK_SIZE: CVal = Size; break;
+ case IOK_TYPE: CVal = Type; break;
}
// Rewrite the type operator and the C or C++ type or variable in terms of an
// immediate. E.g. TYPE foo -> $$4
unsigned Len = End.getPointer() - TypeLoc.getPointer();
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size));
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
- const MCExpr *Imm = MCConstantExpr::Create(Size, getContext());
+ const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
SMLoc Start = Parser.getTok().getLoc(), End;
-
- // offset operator.
StringRef AsmTokStr = Parser.getTok().getString();
- if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") &&
- isParsingInlineAsm())
- return ParseIntelOffsetOfOperator(Start);
-
- // Type directive.
- if ((AsmTokStr == "type" || AsmTokStr == "TYPE") &&
- isParsingInlineAsm())
- return ParseIntelTypeOperator(Start);
-
- // Unsupported directives.
- if (isParsingIntelSyntax() &&
- (AsmTokStr == "size" || AsmTokStr == "SIZE" ||
- AsmTokStr == "length" || AsmTokStr == "LENGTH"))
- return ErrorOperand(Start, "Unsupported directive!");
-
- // immediate.
+
+ // Offset, length, type and size operators.
+ if (isParsingInlineAsm()) {
+ if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
+ return ParseIntelOffsetOfOperator(Start);
+ if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
+ return ParseIntelOperator(Start, IOK_LENGTH);
+ if (AsmTokStr == "size" || AsmTokStr == "SIZE")
+ return ParseIntelOperator(Start, IOK_SIZE);
+ if (AsmTokStr == "type" || AsmTokStr == "TYPE")
+ return ParseIntelOperator(Start, IOK_TYPE);
+ }
+
+ // Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
getLexer().is(AsmToken::Minus)) {
const MCExpr *Val;
- if (!getParser().ParseExpression(Val, End)) {
+ if (!getParser().parseExpression(Val, End)) {
return X86Operand::CreateImm(Val, Start, End);
}
}
- // register
+ // Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
@@ -1008,7 +1237,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
return ParseIntelMemOperand(RegNo, Start);
}
- // mem operand
+ // Memory operand.
return ParseIntelMemOperand(0, Start);
}
@@ -1042,7 +1271,7 @@ X86Operand *X86AsmParser::ParseATTOperand() {
SMLoc Start = Parser.getTok().getLoc(), End;
Parser.Lex();
const MCExpr *Val;
- if (getParser().ParseExpression(Val, End))
+ if (getParser().parseExpression(Val, End))
return 0;
return X86Operand::CreateImm(Val, Start, End);
}
@@ -1060,7 +1289,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
- if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+ if (getParser().parseExpression(Disp, ExprEnd)) return 0;
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
@@ -1086,7 +1315,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc ExprEnd;
// It must be an parenthesized expression, parse it now.
- if (getParser().ParseParenExpression(Disp, ExprEnd))
+ if (getParser().parseParenExpression(Disp, ExprEnd))
return 0;
// After parsing the base expression we could either have a parenthesized
@@ -1146,7 +1375,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t ScaleVal;
- if (getParser().ParseAbsoluteExpression(ScaleVal)){
+ if (getParser().parseAbsoluteExpression(ScaleVal)){
Error(Loc, "expected scale expression");
return 0;
}
@@ -1165,7 +1394,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
- if (getParser().ParseAbsoluteExpression(Value))
+ if (getParser().parseAbsoluteExpression(Value))
return 0;
if (Value != 1)
@@ -1306,7 +1535,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
else {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
@@ -1317,14 +1546,14 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
else {
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return true;
}
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
- Parser.EatToEndOfStatement();
+ Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
}
}
@@ -2014,10 +2243,10 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- if (getParser().ParseExpression(Value))
+ if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+ getParser().getStreamer().EmitValue(Value, Size);
if (getLexer().is(AsmToken::EndOfStatement))
break;
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 64ac5e6..0f6eeb1 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -34,10 +34,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
switch (MI->getOpcode()) {
case X86::INSERTPSrr:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- Src2Name = getRegName(MI->getOperand(2).getReg());
- DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
- break;
case X86::VINSERTPSrr:
DestName = getRegName(MI->getOperand(0).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -46,10 +42,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::MOVLHPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeMOVLHPSMask(2, ShuffleMask);
- break;
case X86::VMOVLHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -58,10 +50,6 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::MOVHLPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeMOVHLPSMask(2, ShuffleMask);
- break;
case X86::VMOVHLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -69,6 +57,29 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeMOVHLPSMask(2, ShuffleMask);
break;
+ case X86::PALIGNR128rr:
+ case X86::VPALIGNR128rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PALIGNR128rm:
+ case X86::VPALIGNR128rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePALIGNRMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::VPALIGNR256rr:
+ Src1Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPALIGNR256rm:
+ Src2Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePALIGNRMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
case X86::PSHUFDri:
case X86::VPSHUFDri:
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -131,15 +142,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::PUNPCKHBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHBWrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
- break;
case X86::VPUNPCKHBWrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHBWrm:
case X86::VPUNPCKHBWrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -154,15 +160,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKHWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHWDrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
- break;
case X86::VPUNPCKHWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHWDrm:
case X86::VPUNPCKHWDrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -177,15 +178,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKHDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
- break;
case X86::VPUNPCKHDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHDQrm:
case X86::VPUNPCKHDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -200,15 +196,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKHQDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHQDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
- break;
case X86::VPUNPCKHQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
case X86::VPUNPCKHQDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -224,15 +215,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::PUNPCKLBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLBWrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
- break;
case X86::VPUNPCKLBWrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLBWrm:
case X86::VPUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -247,15 +233,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLWDrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
- break;
case X86::VPUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLWDrm:
case X86::VPUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -270,15 +251,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
- break;
case X86::VPUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLDQrm:
case X86::VPUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -293,15 +269,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLQDQrm:
- Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
- break;
case X86::VPUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
case X86::VPUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -317,16 +288,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::SHUFPDrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::SHUFPDrmi:
- DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::SHUFPDrmi:
case X86::VSHUFPDrmi:
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
@@ -344,16 +309,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::SHUFPSrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::SHUFPSrmi:
- DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::SHUFPSrmi:
case X86::VSHUFPSrmi:
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
@@ -371,15 +330,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::UNPCKLPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPDrm:
- DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKLPDrm:
case X86::VUNPCKLPDrm:
DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -394,15 +348,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKLPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPSrm:
- DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKLPSrm:
case X86::VUNPCKLPSrm:
DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -417,15 +366,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKHPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKHPDrm:
- DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKHPDrm:
case X86::VUNPCKHPDrm:
DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -440,15 +384,10 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::UNPCKHPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKHPSrm:
- DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
- break;
case X86::VUNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::UNPCKHPSrm:
case X86::VUNPCKHPSrm:
DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 7ea1961..9e68388 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -104,7 +104,7 @@ namespace X86II {
/// MO_TLSLD - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
- /// contains the symbol. When this index is passed to a call to to
+ /// contains the symbol. When this index is passed to a call to
/// __tls_get_addr, the function will return the base address of the TLS
/// block for the symbol. Used in the x86-64 local dynamic TLS access model.
///
@@ -114,7 +114,7 @@ namespace X86II {
/// MO_TLSLDM - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
- /// contains the symbol. When this index is passed to a call to to
+ /// contains the symbol. When this index is passed to a call to
/// ___tls_get_addr, the function will return the base address of the TLS
/// block for the symbol. Used in the IA32 local dynamic TLS access model.
///
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 16488eb..7815ae9 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -44,7 +44,7 @@ void X86MCAsmInfoDarwin::anchor() { }
X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
bool is64Bit = T.getArch() == Triple::x86_64;
if (is64Bit)
- PointerSize = 8;
+ PointerSize = CalleeSaveStackSlotSize = 8;
AssemblerDialect = AsmWriterFlavor;
@@ -76,8 +76,16 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
void X86ELFMCAsmInfo::anchor() { }
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
- if (T.getArch() == Triple::x86_64)
- PointerSize = 8;
+ bool is64Bit = T.getArch() == Triple::x86_64;
+ bool isX32 = T.getEnvironment() == Triple::GNUX32;
+
+ // For ELF, x86-64 pointer size depends on the ABI.
+ // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
+ // with the x32 ABI, pointer size remains the default 4.
+ PointerSize = (is64Bit && !isX32) ? 8 : 4;
+
+ // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
+ CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 4011035..496b704 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -953,3 +953,12 @@ similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should
turn into hsubpd also.
//===---------------------------------------------------------------------===//
+
+define <2 x i32> @foo(<2 x double> %in) {
+ %x = fptosi <2 x double> %in to <2 x i32>
+ ret <2 x i32> %x
+}
+
+Should compile into cvttpd2dq instead of being scalarized into 2 cvttsd2si.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 8b87c1f..bbd4904 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -61,6 +61,24 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(NElts+i);
}
+void DecodePALIGNRMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
+
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Base = i + Offset;
+ // if i+offset is out of this lane then we actually need the other source
+ if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
+ ShuffleMask.push_back(Base + l);
+ }
+ }
+}
+
/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 70d8171..017ab32 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -35,6 +35,8 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
+void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 3ab2899..0216252 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -120,6 +120,8 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
+def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
+ "Support ADX instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 5b3e0ba..ac5daec 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -252,14 +252,15 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
case MachineOperand::MO_Immediate:
- O << '$' << MO.getImm();
+ if (AsmVariant == 0) O << '$';
+ O << MO.getImm();
return;
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol: {
- O << '$';
+ if (AsmVariant == 0) O << '$';
printSymbolOperand(MO, O);
break;
}
@@ -355,19 +356,23 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
NeedPlus = true;
}
- assert (DispSpec.isImm() && "Displacement is not an immediate!");
- int64_t DispVal = DispSpec.getImm();
- if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
- if (NeedPlus) {
- if (DispVal > 0)
- O << " + ";
- else {
- O << " - ";
- DispVal = -DispVal;
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ printOperand(MI, Op+3, O, Modifier, AsmVariant);
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
}
+ O << DispVal;
}
- O << DispVal;
- }
+ }
O << ']';
}
@@ -543,7 +548,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MCSA_IndirectSymbol);
// hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
- OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+ OutStreamer.EmitBytes(StringRef(HltInsts, 5));
}
Stubs.clear();
@@ -569,7 +574,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .long 0
if (MCSym.getInt())
// External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ OutStreamer.EmitIntValue(0, 4/*size*/);
else
// Internal to current translation unit.
//
@@ -578,8 +583,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// using NLPs. However, sometimes the types are local to the file. So
// we need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
+ OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -596,8 +600,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::
Create(Stubs[i].second.getPointer(),
- OutContext),
- 4/*size*/, 0/*addrspace*/);
+ OutContext), 4/*size*/);
}
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -663,7 +666,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
name += ",DATA";
else
name += ",data";
- OutStreamer.EmitBytes(name, 0);
+ OutStreamer.EmitBytes(name);
}
for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
@@ -672,7 +675,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
else
name = " -export:";
name += DLLExportedFns[i]->getName();
- OutStreamer.EmitBytes(name, 0);
+ OutStreamer.EmitBytes(name);
}
}
}
@@ -692,7 +695,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer.EmitLabel(Stubs[i].first);
OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(), 0);
+ TD->getPointerSize());
}
Stubs.clear();
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 61eb14e..bc7496b 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,10 +6,6 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// AT&T assembly code printer class.
-//
-//===----------------------------------------------------------------------===//
#ifndef X86ASMPRINTER_H
#define X86ASMPRINTER_H
@@ -35,7 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
}
virtual const char *getPassName() const LLVM_OVERRIDE {
- return "X86 AT&T-Style Assembly Printer";
+ return "X86 Assembly / Object Emitter";
}
const X86Subtarget &getSubtarget() const { return *Subtarget; }
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 7ad2fdd..b516be0 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -519,6 +519,9 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
+def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
+ R11, R12, R13, R14, R15, RBP,
+ (sequence "XMM%u", 0, 15))>;
// Standard C + YMM6-15
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index bc77334..ece38aa 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -124,7 +124,7 @@ template<class CodeEmitter>
} // end anonymous namespace.
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
-/// to the specified templated MachineCodeEmitter object.
+/// to the specified JITCodeEmitter object.
FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
JITCodeEmitter &JCE) {
return new Emitter<JITCodeEmitter>(TM, JCE);
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
index f321778..69b4c71 100644
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -11,7 +11,7 @@
;;
;;===----------------------------------------------------------------------===
-extrn X86CompilationCallback2: PROC
+extrn LLVMX86CompilationCallback2: PROC
.code
X86CompilationCallback proc
@@ -42,7 +42,7 @@ X86CompilationCallback proc
; Pass prev frame and return address.
mov rcx, rbp
mov rdx, qword ptr [rbp+8]
- call X86CompilationCallback2
+ call LLVMX86CompilationCallback2
; Restore all XMM arg registers.
movaps xmm3, [rsp+48+32]
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 5facb7b..b5c3270 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -75,6 +75,8 @@ public:
virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
+ virtual bool FastLowerArguments();
+
#include "X86GenFastISel.inc"
private:
@@ -326,12 +328,11 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
-
- if (RR != 0) {
- ResultReg = RR;
- return true;
- } else
+ if (RR == 0)
return false;
+
+ ResultReg = RR;
+ return true;
}
/// X86SelectAddress - Attempt to fill in an address from the given value.
@@ -727,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Don't handle popping bytes on return for now.
if (X86MFInfo->getBytesToPopOnReturn() != 0)
- return 0;
+ return false;
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
@@ -738,6 +739,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (F.isVarArg())
return false;
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
@@ -805,8 +809,8 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
DstReg).addReg(SrcReg);
- // Mark the register as live out of the function.
- MRI.addLiveOut(VA.getLocReg());
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
}
// The x86-64 ABI for returning structs by value requires that we copy
@@ -819,11 +823,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
"SRetReturnReg should have been set in LowerFormalArguments()!");
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
X86::RAX).addReg(Reg);
- MRI.addLiveOut(X86::RAX);
+ RetRegs.push_back(X86::RAX);
}
// Now emit the RET.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
@@ -1372,7 +1379,6 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
else if (Len >= 2)
VT = MVT::i16;
else {
- assert(Len == 1);
VT = MVT::i8;
}
@@ -1516,6 +1522,78 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
}
+bool X86FastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ if (!Subtarget->is64Bit())
+ return false;
+
+ // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (Idx > 6)
+ return false;
+
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = I->getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ case MVT::i64:
+ break;
+ default:
+ return false;
+ }
+ }
+
+ static const uint16_t GPR32ArgRegs[] = {
+ X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
+ };
+ static const uint16_t GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
+ };
+
+ Idx = 0;
+ const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
+ for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ I != E; ++I, ++Idx) {
+ if (I->use_empty())
+ continue;
+ bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
+ const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
+ unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(I, ResultReg);
+ }
+ return true;
+}
+
bool X86FastISel::X86SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 420aeb8..a05cf5c 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -50,13 +50,13 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() ||
+ MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MMI.callsUnwindInit() || MMI.callsEHReturn());
}
-static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
+static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
+ if (IsLP64) {
if (isInt<8>(Imm))
return X86::SUB64ri8;
return X86::SUB64ri32;
@@ -67,8 +67,8 @@ static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
-static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
+static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
+ if (IsLP64) {
if (isInt<8>(Imm))
return X86::ADD64ri8;
return X86::ADD64ri32;
@@ -79,8 +79,8 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
}
}
-static unsigned getLEArOpcode(unsigned is64Bit) {
- return is64Bit ? X86::LEA64r : X86::LEA32r;
+static unsigned getLEArOpcode(unsigned IsLP64) {
+ return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
@@ -145,17 +145,17 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
static
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, int64_t NumBytes,
- bool Is64Bit, bool UseLEA,
+ bool Is64Bit, bool IsLP64, bool UseLEA,
const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
unsigned Opc;
if (UseLEA)
- Opc = getLEArOpcode(Is64Bit);
+ Opc = getLEArOpcode(IsLP64);
else
Opc = isSub
- ? getSUBriOpcode(Is64Bit, Offset)
- : getADDriOpcode(Is64Bit, Offset);
+ ? getSUBriOpcode(IsLP64, Offset)
+ : getADDriOpcode(IsLP64, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -660,6 +660,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
bool Is64Bit = STI.is64Bit();
+ bool IsLP64 = STI.isTarget64BitLP64();
bool IsWin64 = STI.isTargetWin64();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
@@ -711,7 +712,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+ TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta)
@@ -927,7 +928,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// MSVC x64's __chkstk needs to adjust %rsp.
// FIXME: %rax preserves the offset and should be available.
if (isSPUpdateNeeded)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
if (isEAXAlive) {
@@ -939,7 +940,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MBB.insert(MBBI, MI);
}
} else if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
// If we need a base pointer, set it up here. It's whatever the value
@@ -996,6 +997,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
+ bool IsLP64 = STI.isTarget64BitLP64();
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
@@ -1081,7 +1083,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (RegInfo->needsStackRealignment(MF))
MBBI = FirstCSPop;
if (CSSize != 0) {
- unsigned Opc = getLEArOpcode(Is64Bit);
+ unsigned Opc = getLEArOpcode(IsLP64);
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
FramePtr, false, -CSSize);
} else {
@@ -1091,7 +1093,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
+ TII, *RegInfo);
}
// We're returning from function via eh_return.
@@ -1126,7 +1129,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
+ UseLEA, TII, *RegInfo);
}
// Jump to label or value in register.
@@ -1169,7 +1173,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
+ *RegInfo);
}
}
@@ -1382,16 +1387,25 @@ HasNestArgument(const MachineFunction *MF) {
}
-/// GetScratchRegister - Get a register for performing work in the segmented
-/// stack prologue. Depending on platform and the properties of the function
-/// either one or two registers will be needed. Set primary to true for
-/// the first register, false for the second.
+/// GetScratchRegister - Get a temp register for performing work in the
+/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
+/// and the properties of the function either one or two registers will be
+/// needed. Set primary to true for the first register, false for the second.
static unsigned
GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+ CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+
+ // Erlang stuff.
+ if (CallingConvention == CallingConv::HiPE) {
+ if (Is64Bit)
+ return Primary ? X86::R14 : X86::R13;
+ else
+ return Primary ? X86::EBX : X86::EDI;
+ }
+
if (Is64Bit)
return Primary ? X86::R11 : X86::R12;
- CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
if (CallingConvention == CallingConv::X86_FastCall ||
@@ -1419,7 +1433,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
bool Is64Bit = STI.is64Bit();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
- const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
@@ -1427,8 +1440,8 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux() && !ST->isTargetDarwin() &&
- !ST->isTargetWin32() && !ST->isTargetFreeBSD())
+ if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
+ !STI.isTargetWin32() && !STI.isTargetFreeBSD())
report_fatal_error("Segmented stacks not supported on this platform.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
@@ -1466,13 +1479,13 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- if (ST->isTargetLinux()) {
+ if (STI.isTargetLinux()) {
TlsReg = X86::FS;
TlsOffset = 0x70;
- } else if (ST->isTargetDarwin()) {
+ } else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
- } else if (ST->isTargetFreeBSD()) {
+ } else if (STI.isTargetFreeBSD()) {
TlsReg = X86::FS;
TlsOffset = 0x18;
} else {
@@ -1488,16 +1501,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
- if (ST->isTargetLinux()) {
+ if (STI.isTargetLinux()) {
TlsReg = X86::GS;
TlsOffset = 0x30;
- } else if (ST->isTargetDarwin()) {
+ } else if (STI.isTargetDarwin()) {
TlsReg = X86::GS;
TlsOffset = 0x48 + 90*4;
- } else if (ST->isTargetWin32()) {
+ } else if (STI.isTargetWin32()) {
TlsReg = X86::FS;
TlsOffset = 0x14; // pvArbitrary, reserved for application use
- } else if (ST->isTargetFreeBSD()) {
+ } else if (STI.isTargetFreeBSD()) {
report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
} else {
report_fatal_error("Segmented stacks not supported on this platform.");
@@ -1509,10 +1522,10 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- if (ST->isTargetLinux() || ST->isTargetWin32()) {
+ if (STI.isTargetLinux() || STI.isTargetWin32()) {
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
- } else if (ST->isTargetDarwin()) {
+ } else if (STI.isTargetDarwin()) {
// TlsOffset doesn't fit into a mod r/m byte so we need an extra register
unsigned ScratchReg2;
@@ -1598,3 +1611,229 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.verify();
#endif
}
+
+// Erlang programs may need a special prologue to handle the stack size they
+// might need at runtime. That is because Erlang/OTP does not implement a C
+// stack but uses a custom implementation of hybrid stack/heap
+// architecture. (for more information see Eric Stenman's Ph.D. thesis:
+// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
+//
+//
+// CheckStack:
+// temp0 = sp - MaxStack
+// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+// OldStart:
+// ...
+// IncStack:
+// call inc_stack # doubles the stack space
+// temp0 = sp - MaxStack
+// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize();
+ const bool Is64Bit = STI.is64Bit();
+ DebugLoc DL;
+ // HiPE-specific values
+ const unsigned HipeLeafWords = 24;
+ const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
+ const unsigned Guaranteed = HipeLeafWords * SlotSize;
+ unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
+ MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
+ unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
+
+ assert(STI.isTargetLinux() &&
+ "HiPE prologue is only supported on Linux operating systems.");
+
+ // Compute the largest caller's frame that is needed to fit the callees'
+ // frames. This 'MaxStack' is computed from:
+ //
+ // a) the fixed frame size, which is the space needed for all spilled temps,
+ // b) outgoing on-stack parameter areas, and
+ // c) the minimum stack space this function needs to make available for the
+ // functions it calls (a tunable ABI property).
+ if (MFI->hasCalls()) {
+ unsigned MoreStackForCalls = 0;
+
+ for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
+ MBBI != MBBE; ++MBBI)
+ for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
+ MI != ME; ++MI) {
+ if (!MI->isCall())
+ continue;
+
+ // Get callee operand.
+ const MachineOperand &MO = MI->getOperand(0);
+
+ // Only take account of global function calls (no closures etc.).
+ if (!MO.isGlobal())
+ continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F)
+ continue;
+
+ // Do not update 'MaxStack' for primitive and built-in functions
+ // (encoded with names either starting with "erlang."/"bif_" or not
+ // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
+ // "_", such as the BIF "suspend_0") as they are executed on another
+ // stack.
+ if (F->getName().find("erlang.") != StringRef::npos ||
+ F->getName().find("bif_") != StringRef::npos ||
+ F->getName().find_first_of("._") == StringRef::npos)
+ continue;
+
+ unsigned CalleeStkArity =
+ F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
+ if (HipeLeafWords - 1 > CalleeStkArity)
+ MoreStackForCalls = std::max(MoreStackForCalls,
+ (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
+ }
+ MaxStack += MoreStackForCalls;
+ }
+
+ // If the stack frame needed is larger than the guaranteed then runtime checks
+ // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
+ if (MaxStack > Guaranteed) {
+ MachineBasicBlock &prologueMBB = MF.front();
+ MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
+
+ for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
+ E = prologueMBB.livein_end(); I != E; I++) {
+ stackCheckMBB->addLiveIn(*I);
+ incStackMBB->addLiveIn(*I);
+ }
+
+ MF.push_front(incStackMBB);
+ MF.push_front(stackCheckMBB);
+
+ unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
+ unsigned LEAop, CMPop, CALLop;
+ if (Is64Bit) {
+ SPReg = X86::RSP;
+ PReg = X86::RBP;
+ LEAop = X86::LEA64r;
+ CMPop = X86::CMP64rm;
+ CALLop = X86::CALL64pcrel32;
+ SPLimitOffset = 0x90;
+ } else {
+ SPReg = X86::ESP;
+ PReg = X86::EBP;
+ LEAop = X86::LEA32r;
+ CMPop = X86::CMP32rm;
+ CALLop = X86::CALLpcrel32;
+ SPLimitOffset = 0x4c;
+ }
+
+ ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+ assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
+ "HiPE prologue scratch register is live-in");
+
+ // Create new MBB for StackCheck:
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ // SPLimitOffset is in a fixed heap location (pointed by BP).
+ addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
+
+ // Create new MBB for IncStack:
+ BuildMI(incStackMBB, DL, TII.get(CALLop)).
+ addExternalSymbol("inc_stack_0");
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
+ SPReg, false, -MaxStack);
+ addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
+ .addReg(ScratchReg), PReg, false, SPLimitOffset);
+ BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
+
+ stackCheckMBB->addSuccessor(&prologueMBB, 99);
+ stackCheckMBB->addSuccessor(incStackMBB, 1);
+ incStackMBB->addSuccessor(&prologueMBB, 99);
+ incStackMBB->addSuccessor(incStackMBB, 1);
+ }
+#ifdef XDEBUG
+ MF.verify();
+#endif
+}
+
+void X86FrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo &RegInfo = *TM.getRegisterInfo();
+ unsigned StackPtr = RegInfo.getStackRegister();
+ bool reseveCallFrame = hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ bool IsLP64 = STI.isTarget64BitLP64();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == TII.getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
+
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(IsLP64, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
+ MBB.insert(I, New);
+ }
+
+ return;
+ }
+
+ if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // We are not tracking the stack pointer adjustment by the callee, so make
+ // sure we restore the stack pointer immediately after the call, there may
+ // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+ MachineBasicBlock::iterator B = MBB.begin();
+ while (I != B && !llvm::prior(I)->isCall())
+ --I;
+ MBB.insert(I, New);
+ }
+}
+
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index dc515dc..3f08b9a 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -43,6 +43,8 @@ public:
void adjustForSegmentedStacks(MachineFunction &MF) const;
+ void adjustForHiPEPrologue(MachineFunction &MF) const;
+
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
@@ -63,6 +65,10 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 935f9bd..00fbe69 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -280,13 +280,13 @@ namespace {
/// getTargetMachine - Return a reference to the TargetMachine, casted
/// to the target-specific type.
- const X86TargetMachine &getTargetMachine() {
+ const X86TargetMachine &getTargetMachine() const {
return static_cast<const X86TargetMachine &>(TM);
}
/// getInstrInfo - Return a reference to the TargetInstrInfo, casted
/// to the target-specific type.
- const X86InstrInfo *getInstrInfo() {
+ const X86InstrInfo *getInstrInfo() const {
return getTargetMachine().getInstrInfo();
}
};
@@ -446,7 +446,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
(N->getOpcode() == X86ISD::CALL ||
(N->getOpcode() == X86ISD::TC_RETURN &&
- // Only does this if load can be foled into TC_RETURN.
+ // Only does this if load can be folded into TC_RETURN.
(Subtarget->is64Bit() ||
getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
/// Also try moving call address load from outside callseq_start to just
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4ab92ad..1c3b9ae 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -605,10 +605,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
@@ -633,8 +635,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
// We don't support sin/cos/fmod
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
@@ -644,8 +647,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
@@ -659,10 +663,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f32 , Expand);
- setOperationAction(ISD::FSIN , MVT::f64 , Expand);
- setOperationAction(ISD::FCOS , MVT::f32 , Expand);
- setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
@@ -699,8 +705,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
if (!TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FSIN , MVT::f80 , Expand);
- setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ setOperationAction(ISD::FSIN , MVT::f80, Expand);
+ setOperationAction(ISD::FCOS , MVT::f80, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
@@ -748,7 +755,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
@@ -1047,6 +1056,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
+ setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
@@ -1111,6 +1122,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
+ setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
+
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
@@ -1166,6 +1179,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SHL, MVT::v8i32, Legal);
setOperationAction(ISD::SRA, MVT::v8i32, Legal);
+
+ setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1275,6 +1290,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setLibcallName(RTLIB::SRA_I128, 0);
}
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->isTargetDarwin()) {
+ // For MacOSX, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -1295,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1306,17 +1335,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// On Darwin, -Os means optimize for size without hurting performance,
// do not reduce the limit.
- maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
- maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
- maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4); // 2^4 bytes.
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
// Predictable cmov don't hurt on atom because it's in-order.
- predictableSelectIsExpensive = !Subtarget->isAtom();
+ PredictableSelectIsExpensive = !Subtarget->isAtom();
setPrefFunctionAlignment(4); // 2^4 bytes.
}
@@ -1562,14 +1591,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
- // Add the regs to the liveout set for the function.
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
- MRI.addLiveOut(RVLocs[i].getLocReg());
-
SDValue Flag;
-
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
@@ -1638,12 +1660,13 @@ X86TargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax.
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
if (Subtarget->is64Bit() &&
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1653,11 +1676,12 @@ X86TargetLowering::LowerReturn(SDValue Chain,
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
- // RAX now acts like a return value.
- MRI.addLiveOut(X86::RAX);
+ // RAX/EAX now acts like a return value.
+ RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
}
RetOps[0] = Chain; // Update chain.
@@ -2009,14 +2033,16 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(ArgValue);
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. Save the argument into
- // a virtual register so that we can access it from the return points.
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Save the argument into a virtual register so that we can access it
+ // from the return points.
if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ MVT PtrTy = getPointerTy();
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -2630,8 +2656,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
- return DAG.getNode(X86ISD::TC_RETURN, dl,
- NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -2789,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
@@ -2828,7 +2853,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
- if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
@@ -2948,9 +2973,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
- !isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
+ ((!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) ||
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
+ // In PIC we need an extra register to formulate the address computation
+ // for the callee.
+ unsigned MaxInRegs =
+ (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
@@ -2959,7 +2990,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
- if (++NumInRegs == 3)
+ if (++NumInRegs == MaxInRegs)
return false;
break;
}
@@ -2995,7 +3026,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
@@ -3045,7 +3076,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -3355,8 +3386,8 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
const X86Subtarget *Subtarget) {
- if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
- (VT.getSizeInBits() == 256 && !Subtarget->hasInt256()))
+ if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
+ (VT.is256BitVector() && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
@@ -3445,7 +3476,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
/// reverse of what x86 shuffles want.
static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
bool Commuted = false) {
- if (!HasFp256 && VT.getSizeInBits() == 256)
+ if (!HasFp256 && VT.is256BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
@@ -3580,7 +3611,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
DebugLoc dl = SVOp->getDebugLoc();
if (VT != MVT::v8i32 && VT != MVT::v8f32)
@@ -3630,7 +3661,7 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3669,7 +3700,7 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3700,14 +3731,14 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
- bool HasInt256) {
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
+ bool Is256BitVec = VT.is256BitVector();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3715,7 +3746,7 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
// FIXME: Need a better way to get rid of this, there's no latency difference
// between UNPCKLPD and MOVDDUP, the later should always be checked first and
// the former later. We should also remove the "_undef" special mask.
- if (NumElts == 4 && VT.getSizeInBits() == 256)
+ if (NumElts == 4 && Is256BitVec)
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3749,7 +3780,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
- if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+ if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
@@ -3831,7 +3862,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned HalfSize = VT.getVectorNumElements()/2;
@@ -3865,7 +3896,7 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
unsigned NumElts = VT.getVectorNumElements();
// Only match 256-bit with 32/64-bit types
- if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
+ if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
return false;
unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3921,8 +3952,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
- (VT.getSizeInBits() == 256 && NumElems != 8))
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
return false;
// "i+1" is the value the indexed mask element must have
@@ -3944,8 +3975,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
- (VT.getSizeInBits() == 256 && NumElems != 8))
+ if ((VT.is128BitVector() && NumElems != 4) ||
+ (VT.is256BitVector() && NumElems != 8))
return false;
// "i" is the value the indexed mask element must have
@@ -4005,9 +4036,8 @@ bool X86::isVEXTRACTF128Index(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- unsigned VL = N->getValueType(0).getVectorNumElements();
- unsigned VBits = N->getValueType(0).getSizeInBits();
- unsigned ElSize = VBits / VL;
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % 128 == 0;
return Result;
@@ -4024,9 +4054,8 @@ bool X86::isVINSERTF128Index(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- unsigned VL = N->getValueType(0).getVectorNumElements();
- unsigned VBits = N->getValueType(0).getSizeInBits();
- unsigned ElSize = VBits / VL;
+ MVT VT = N->getValueType(0).getSimpleVT();
+ unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % 128 == 0;
return Result;
@@ -4036,7 +4065,7 @@ bool X86::isVINSERTF128Index(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// Handles 128-bit and 256-bit.
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for PSHUF/SHUFP");
@@ -4066,7 +4095,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4090,7 +4119,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4114,7 +4143,7 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
@@ -4145,8 +4174,8 @@ unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- EVT VecVT = N->getOperand(0).getValueType();
- EVT ElVT = VecVT.getVectorElementType();
+ MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
@@ -4162,8 +4191,8 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- EVT VecVT = N->getValueType(0);
- EVT ElVT = VecVT.getVectorElementType();
+ MVT VecVT = N->getValueType(0).getSimpleVT();
+ MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
@@ -4173,7 +4202,7 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
/// Handles 256-bit.
static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
+ MVT VT = N->getValueType(0).getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
@@ -4193,17 +4222,18 @@ static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->isNullValue()) ||
- (isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
+ return CN->isNullValue();
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
+ return CFP->getValueAPF().isPosZero();
+ return false;
}
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
@@ -4352,12 +4382,11 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- unsigned Size = VT.getSizeInBits();
// Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (Size == 128) { // SSE
+ if (VT.is128BitVector()) { // SSE
if (Subtarget->hasSSE2()) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -4365,7 +4394,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
- } else if (Size == 256) { // AVX
+ } else if (VT.is256BitVector()) { // AVX
if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
@@ -4387,14 +4416,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG,
+static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- unsigned Size = VT.getSizeInBits();
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (Size == 256) {
+ if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
@@ -4402,7 +4430,7 @@ static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
}
- } else if (Size == 128) {
+ } else if (VT.is128BitVector()) {
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else
llvm_unreachable("Unexpected vector type");
@@ -4481,14 +4509,13 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
EVT VT = V.getValueType();
DebugLoc dl = V.getDebugLoc();
- unsigned Size = VT.getSizeInBits();
- if (Size == 128) {
+ if (VT.is128BitVector()) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
&SplatMask[0]);
- } else if (Size == 256) {
+ } else if (VT.is256BitVector()) {
// To use VPERMILPS to splat scalars, the second half of indicies must
// refer to the higher part, which is a duplication of the lower one,
// because VPERMILPS can only handle in-lane permutations.
@@ -4512,14 +4539,14 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
int EltNo = SV->getSplatIndex();
int NumElems = SrcVT.getVectorNumElements();
- unsigned Size = SrcVT.getSizeInBits();
+ bool Is256BitVec = SrcVT.is256BitVector();
- assert(((Size == 128 && NumElems > 4) || Size == 256) &&
- "Unknown how to promote splat for type");
+ assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
+ "Unknown how to promote splat for type");
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
- if (Size == 256) {
+ if (Is256BitVec) {
V1 = Extract128BitVector(V1, EltNo, DAG, dl);
if (EltNo >= NumElems/2)
EltNo -= NumElems/2;
@@ -4536,7 +4563,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Recreate the 256-bit vector and place the same 128-bit vector
// into the low and high part. This is necessary because we want
// to use VPERM* to shuffle the vectors
- if (Size == 256) {
+ if (Is256BitVec) {
V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
}
@@ -4588,6 +4615,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
break;
+ case X86ISD::PALIGNR:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
@@ -4631,7 +4662,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLPS:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
// Not yet implemented
return false;
default: llvm_unreachable("unknown target shuffle node");
@@ -5099,7 +5129,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->hasFp256())
return SDValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
@@ -5297,8 +5327,8 @@ SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT ExtVT = VT.getVectorElementType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
// Vectors containing all zeros can be matched by pxor and xorps later
@@ -5314,7 +5344,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
- if (ISD::isBuildVectorAllOnes(Op.getNode())) {
+ if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
@@ -5629,7 +5659,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
- EVT ResVT = Op.getValueType();
+ MVT ResVT = Op.getValueType().getSimpleVT();
assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
@@ -5655,8 +5685,8 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = SVOp->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
@@ -5667,41 +5697,40 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
// Check the mask for BLEND and build the value.
unsigned MaskValue = 0;
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumLanes = (NumElems-1)/8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
- int SndLaneEltIdx = (NumLanes == 2) ?
+ int SndLaneEltIdx = (NumLanes == 2) ?
SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
- if ((EltIdx == -1 || EltIdx == (int)i) &&
- (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ if ((EltIdx < 0 || EltIdx == (int)i) &&
+ (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
continue;
- if (((unsigned)EltIdx == (i + NumElems)) &&
- (SndLaneEltIdx == -1 ||
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
MaskValue |= (1<<i);
- else
+ else
return SDValue();
}
// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
- EVT BlendVT = VT;
+ MVT BlendVT = VT;
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
- BlendVT = EVT::getVectorVT(*DAG.getContext(),
- EVT::getFloatingPointVT(EltVT.getSizeInBits()),
- NumElems);
+ BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
}
-
- SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
- DAG.getConstant(MaskValue, MVT::i32));
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
@@ -5836,6 +5865,11 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
}
}
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
@@ -5851,7 +5885,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
int EltIdx = MaskVals[i] * 2;
int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx;
int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1;
- pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8));
}
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
@@ -5969,6 +6003,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DebugLoc dl = SVOp->getDebugLoc();
ArrayRef<int> MaskVals = SVOp->getMask();
+ // Promote splats to a larger type which usually leads to more efficient code.
+ // FIXME: Is this true if pshufb is available?
+ if (SVOp->isSplat())
+ return PromoteSplat(SVOp, DAG);
+
// If we have SSSE3, case 1 is generated when all result bytes come from
// one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
// present, fall back to case 3.
@@ -6087,7 +6126,7 @@ static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -6134,8 +6173,9 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG, DebugLoc dl) {
+ SelectionDAG &DAG) {
MVT VT = SVOp->getValueType(0).getSimpleVT();
+ DebugLoc dl = SVOp->getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
unsigned Scale;
@@ -6171,7 +6211,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(EVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, EVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, DebugLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -6213,14 +6253,14 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
if (NewOp.getNode())
return NewOp;
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
DebugLoc dl = SVOp->getDebugLoc();
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
SDValue Output[2];
SmallVector<int, 16> Mask;
@@ -6325,7 +6365,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getValueType(0).getSimpleVT();
assert(VT.is128BitVector() && "Unsupported vector size");
@@ -6579,7 +6619,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
// Reduce a vector shuffle to zext.
SDValue
-X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// PMOVZX is only available from SSE41.
if (!Subtarget->hasSSE41())
return SDValue();
@@ -6623,9 +6663,10 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+ LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*DAG.getContext(), NBits);
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), NeVT, NumElems >> Shift);
+ EVT NeVT = EVT::getIntegerVT(*Context, NBits);
+ EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
if (!isTypeLegal(NVT))
return SDValue();
@@ -6644,8 +6685,21 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
- (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()))
+ (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
+ if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ // The "ext_vec_elt" node is wider than the result node.
+ // In this case we should extract subvector from V.
+ // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
+ unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
+ EVT FullVT = V.getValueType();
+ EVT SubVecVT = EVT::getVectorVT(*Context,
+ FullVT.getVectorElementType(),
+ FullVT.getVectorNumElements()/Ratio);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
+ DAG.getIntPtrConstant(0));
+ }
V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ }
}
return DAG.getNode(ISD::BITCAST, DL, VT,
@@ -6655,7 +6709,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -6665,25 +6719,14 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// Handle splat operations
if (SVOp->isSplat()) {
- unsigned NumElem = VT.getVectorNumElements();
- int Size = VT.getSizeInBits();
-
// Use vbroadcast whenever the splat comes from a foldable load
SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
if (Broadcast.getNode())
return Broadcast;
-
- // Handle splats by matching through known shuffle masks
- if ((Size == 128 && NumElem <= 4) ||
- (Size == 256 && NumElem <= 8))
- return SDValue();
-
- // All remaning splats are promoted to target supported vector shuffles.
- return PromoteSplat(SVOp, DAG);
}
// Check integer expanding shuffles.
- SDValue NewOp = lowerVectorIntExtend(Op, DAG);
+ SDValue NewOp = LowerVectorIntExtend(Op, DAG);
if (NewOp.getNode())
return NewOp;
@@ -6691,7 +6734,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
VT == MVT::v16i16 || VT == MVT::v32i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 ||
@@ -6699,18 +6742,18 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- EVT NewVT = NewOp.getValueType();
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- EVT NewVT = NewOp.getValueType();
+ MVT NewVT = NewOp.getValueType().getSimpleVT();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
DAG, Subtarget, dl);
@@ -6725,7 +6768,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
@@ -6816,7 +6859,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
@@ -6855,7 +6898,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (isShift) {
// No better options. Use a vshldq / vsrldq.
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
@@ -6926,7 +6969,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// nodes, and remove one by one until they don't return Op anymore.
if (isPALIGNRMask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
@@ -7035,13 +7078,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
- if (!Op.getOperand(0).getValueType().is128BitVector())
+ if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -7106,7 +7147,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
SDValue Vec = Op.getOperand(0);
- EVT VecVT = Vec.getValueType();
+ MVT VecVT = Vec.getValueType().getSimpleVT();
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
@@ -7133,7 +7174,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Res;
}
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
DebugLoc dl = Op.getDebugLoc();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
@@ -7146,7 +7187,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
- EVT EltVT = MVT::i32;
+ MVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
@@ -7161,7 +7202,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
- EVT VVT = Op.getOperand(0).getValueType();
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7180,7 +7221,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
- EVT VVT = Op.getOperand(0).getValueType();
+ MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7190,11 +7231,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return SDValue();
}
-SDValue
-X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
+static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
@@ -7247,8 +7286,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
@@ -7296,7 +7335,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT OpVT = Op.getValueType();
+ MVT OpVT = Op.getValueType().getSimpleVT();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
@@ -7511,8 +7550,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
- int64_t Offset,
- SelectionDAG &DAG) const {
+ int64_t Offset, SelectionDAG &DAG) const {
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
@@ -7732,7 +7770,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
Subtarget->is64Bit(),
- getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ getTargetMachine().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -8015,9 +8053,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SmallVector<Constant*,2> CV1;
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
- ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+ ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
@@ -8111,7 +8151,8 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
"Custom UINT_TO_FP is not supported!");
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, SVT.getVectorNumElements());
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
@@ -8204,8 +8245,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
-std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
+std::pair<SDValue,SDValue>
+X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
@@ -8299,9 +8341,9 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
- EVT VT = Op->getValueType(0);
+ MVT VT = Op->getValueType(0).getSimpleVT();
SDValue In = Op->getOperand(0);
- EVT InVT = In.getValueType();
+ MVT InVT = In.getValueType().getSimpleVT();
DebugLoc dl = Op->getDebugLoc();
// Optimize vectors in AVX mode:
@@ -8330,7 +8372,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
- EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
@@ -8352,9 +8394,9 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -8382,11 +8424,11 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
}
-SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
@@ -8501,9 +8543,10 @@ SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector()) {
- if (Op.getValueType() == MVT::v8i16)
- return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(),
+ MVT VT = Op.getValueType().getSimpleVT();
+ if (VT.isVector()) {
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), VT,
DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
MVT::v8i32, Op.getOperand(0)));
return SDValue();
@@ -8542,12 +8585,11 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
return FIST;
}
-SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
SDValue In = Op.getOperand(0);
- EVT SVT = In.getValueType();
+ MVT SVT = In.getValueType().getSimpleVT();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -8559,8 +8601,8 @@ SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT EltVT = VT;
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
@@ -8568,9 +8610,11 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, ~(1ULL << 63))));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, ~(1U << 31))));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8591,8 +8635,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT EltVT = VT;
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
@@ -8600,9 +8644,11 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
}
Constant *C;
if (EltVT == MVT::f64)
- C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+ APInt(64, 1ULL << 63)));
else
- C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+ C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
+ APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
@@ -8626,8 +8672,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT SrcVT = Op1.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
+ MVT SrcVT = Op1.getValueType().getSimpleVT();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -8646,13 +8692,15 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// First get the sign bit of second operand.
SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8675,13 +8723,17 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+ const fltSemantics &Sem = APFloat::IEEEdouble;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(64, 0))));
} else {
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
- CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+ const fltSemantics &Sem = APFloat::IEEEsingle;
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem,
+ APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
+ CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8697,7 +8749,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
// Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
@@ -8707,7 +8759,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
+ SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
@@ -9139,65 +9192,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
return SDValue();
}
-SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
- if (Op.getValueType().isVector()) return LowerVSETCC(Op, DAG);
-
- assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
- // Optimize to BT if possible.
- // Lower (X & (1 << N)) == 0 to BT(X, N).
- // Lower ((X >>u N) & 1) != 0 to BT(X, N).
- // Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
- Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->isNullValue() &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
- if (NewSetCC.getNode())
- return NewSetCC;
- }
-
- // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
- // these.
- if (Op1.getOpcode() == ISD::Constant &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- cast<ConstantSDNode>(Op1)->isNullValue()) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-
- // If the input is a setcc, then reuse the input setcc or use a new one with
- // the inverted condition.
- if (Op0.getOpcode() == X86ISD::SETCC) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
- if (!Invert) return Op0;
-
- CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
- }
- }
-
- bool isFP = Op1.getValueType().isFloatingPoint();
- unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
- if (X86CC == X86::COND_INVALID)
- return SDValue();
-
- SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
- EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAGS);
-}
-
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
@@ -9217,26 +9215,27 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
-SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
#ifndef NDEBUG
- EVT EltVT = Op0.getValueType().getVectorElementType();
+ MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
@@ -9377,6 +9376,63 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+
+ MVT VT = Op.getValueType().getSimpleVT();
+
+ if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
+
+ assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+ if (NewSetCC.getNode())
+ return NewSetCC;
+ }
+
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+ }
+
+ bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+ if (X86CC == X86::COND_INVALID)
+ return SDValue();
+
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
@@ -9499,7 +9555,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getValueType().getSimpleVT();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -9610,9 +9666,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
- EVT VT = Op->getValueType(0);
+ MVT VT = Op->getValueType(0).getSimpleVT();
SDValue In = Op->getOperand(0);
- EVT InVT = In.getValueType();
+ MVT InVT = In.getValueType().getSimpleVT();
DebugLoc dl = Op->getDebugLoc();
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
@@ -9646,7 +9702,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
@@ -10155,7 +10211,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
-// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue SrcOp, SDValue ShAmt,
@@ -11377,13 +11433,55 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
+SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT EltTy = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Lower sdiv X, pow2-const.
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+ if (!C)
+ return SDValue();
+
+ APInt SplatValue, SplatUndef;
+ unsigned MinSplatBits;
+ bool HasAnyUndefs;
+ if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs))
+ return SDValue();
+
+ if ((SplatValue != 0) &&
+ (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
+ unsigned lg2 = SplatValue.countTrailingZeros();
+ // Splat the sign bit.
+ SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
+ SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
+ SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
+ SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
+ SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (SplatValue.isNonNegative())
+ return SRA;
+
+ SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
+ return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+ }
+ return SDValue();
+}
+
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- LLVMContext *Context = DAG.getContext();
if (!Subtarget->hasSSE2())
return SDValue();
@@ -11500,17 +11598,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
- DAG.getConstant(23, MVT::i32));
-
- const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
- Constant *C = ConstantDataVector::get(*Context, CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
- SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
- Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
@@ -11519,8 +11609,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
- Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
- DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
@@ -11952,6 +12041,43 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
+
+ // For MacOSX, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two XMM registers.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Only optimize x86_64 for now. i386 is a bit messy. For f32,
+ // the small struct {f32, f32} is returned in (eax, edx). For f64,
+ // the results are returned via SRet in memory.
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/true,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -11981,13 +12107,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
- case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
- case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
@@ -12033,6 +12159,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
}
}
@@ -12372,7 +12500,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
- case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
@@ -12783,7 +12911,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
@@ -13015,7 +13143,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
MachineFunction::iterator I = MBB;
++I;
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
"Unexpected number of operands");
assert(MI->hasOneMemOperand() &&
@@ -15246,13 +15374,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (CondRHS.getConstantOperandVal(0) == -A-1) {
- SmallVector<SDValue, 32> V(VT.getVectorNumElements(),
- DAG.getConstant(-A, VT.getScalarType()));
+ if (CondRHS.getConstantOperandVal(0) == -A-1)
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- V.data(), V.size()));
- }
+ DAG.getConstant(-A, VT));
}
// Another special case: If C was a sign bit, the sub has been
@@ -15552,7 +15676,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
ConstantSDNode *CmpAgainst = 0;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
- dyn_cast<ConstantSDNode>(Cond.getOperand(0)) == 0) {
+ !isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
@@ -15832,8 +15956,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
// Check for any users that want flags:
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end();
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
@@ -15920,7 +16043,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
- if (VT.getSizeInBits() != 256)
+ if (!VT.is256BitVector())
return SDValue();
assert((N->getOpcode() == ISD::ANY_EXTEND ||
@@ -15929,7 +16052,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Narrow = N->getOperand(0);
EVT NarrowVT = Narrow->getValueType(0);
- if (NarrowVT.getSizeInBits() != 128)
+ if (!NarrowVT.is128BitVector())
return SDValue();
if (Narrow->getOpcode() != ISD::XOR &&
@@ -16125,11 +16248,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
DebugLoc DL = N->getDebugLoc();
- // We are going to replace the AND, OR, NAND with either BLEND
- // or PSIGN, which only look at the MSB. The VSRAI instruction
- // does not affect the highest bit, so we can get rid of it.
- Mask = Mask.getOperand(0);
-
// Now we know we at least have a plendvb with the mask val. See if
// we can form a psignb/w/d.
// psign = x.type == y.type == mask.type && y = sub(0, x);
@@ -16138,7 +16256,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Unsupported VT for PSIGN");
- Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask);
+ Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
// PBLENDVB only available on SSE 4.1
@@ -16296,8 +16414,42 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
EVT MemVT = Ld->getMemoryVT();
DebugLoc dl = Ld->getDebugLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned RegSz = RegVT.getSizeInBits();
ISD::LoadExtType Ext = Ld->getExtensionType();
+ unsigned Alignment = Ld->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
+
+ // On Sandybridge unaligned 256bit loads are inefficient.
+ if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
+ !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+ unsigned NumElems = RegVT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Ptr = Ld->getBasePtr();
+ SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
+
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ NumElems/2);
+ SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ Alignment);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
+ Ld->getPointerInfo(), Ld->isVolatile(),
+ Ld->isNonTemporal(), Ld->isInvariant(),
+ std::max(Alignment/2U, 1U));
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load1.getValue(1),
+ Load2.getValue(1));
+
+ SDValue NewVec = DAG.getUNDEF(RegVT);
+ NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
+ NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
+ return DCI.CombineTo(N, NewVec, TF, true);
+ }
// If this is a vector EXT Load then attempt to optimize it using a
// shuffle. If SSSE3 is not available we may emit an illegal shuffle but the
@@ -16312,7 +16464,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
assert(MemVT.isVector() && "Must load a vector from memory");
unsigned NumElems = RegVT.getVectorNumElements();
- unsigned RegSz = RegVT.getSizeInBits();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
@@ -16356,8 +16507,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Represent the data using the same element type that is stored in
// memory. In practice, we ''widen'' MemVT.
- EVT WideVecVT =
- EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
+ EVT WideVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize/MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
@@ -16426,10 +16577,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// Build the arithmetic shift.
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
MemVT.getVectorElementType().getSizeInBits();
- SmallVector<SDValue, 8> C(NumElems,
- DAG.getConstant(Amt, RegVT.getScalarType()));
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size());
- Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV);
+ Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
+ DAG.getConstant(Amt, RegVT));
return DCI.CombineTo(N, Shuff, TF, true);
}
@@ -16462,16 +16611,21 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DebugLoc dl = St->getDebugLoc();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned Alignment = St->getAlignment();
+ bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
// If we are saving a concatenation of two XMM registers, perform two stores.
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
- StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
- StoredVal.getNumOperands() == 2) {
- SDValue Value0 = StoredVal.getOperand(0);
- SDValue Value1 = StoredVal.getOperand(1);
+ StVT == VT && !IsAligned) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+
+ SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
+ SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
@@ -16479,10 +16633,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), St->getAlignment());
+ St->isNonTemporal(),
+ std::max(Alignment/2U, 1U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
@@ -16917,6 +17072,41 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
+ DebugLoc dl = N->getDebugLoc();
+
+ // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
+ // both SSE and AVX2 since there is no sign-extended shift right
+ // operation on a vector with 64-bit elements.
+ //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
+ // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
+ if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)) {
+ SDValue N00 = N0.getOperand(0);
+
+ // EXTLOAD has a better solution on AVX2,
+ // it may be replaced with X86ISD::VSEXT node.
+ if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+ if (!ISD::isNormalLoad(N00.getNode()))
+ return SDValue();
+
+ if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
+ SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
+ N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -17002,7 +17192,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
}
}
- if (VT.isVector() && VT.getSizeInBits() == 256) {
+ if (VT.is256BitVector()) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
@@ -17037,8 +17227,8 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
+// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
+// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, MVT::i8,
@@ -17056,13 +17246,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
+ // Try to convert COND_A into COND_B in an attempt to facilitate
// materializing "setb reg".
//
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(),
@@ -17270,7 +17460,8 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
- return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0), In.getOperand(0));
+ return DAG.getNode(X86ISD::VZEXT, N->getDebugLoc(), N->getValueType(0),
+ In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
@@ -17308,13 +17499,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
- case X86ISD::PALIGN:
+ case X86ISD::PALIGNR:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
@@ -17497,7 +17689,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
@@ -17515,7 +17707,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
- std::sort(AsmPieces.begin(), AsmPieces.end());
+ array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
@@ -17995,7 +18187,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// really want an 8-bit or 32-bit register, map to the appropriate register
// class and return the appropriate register.
if (Res.second == &X86::GR16RegClass) {
- if (VT == MVT::i8) {
+ if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -18008,7 +18200,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
- } else if (VT == MVT::i32) {
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
@@ -18025,7 +18217,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
- } else if (VT == MVT::i64) {
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 16ce364..958ceb0 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -234,11 +234,8 @@ namespace llvm {
// EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
EH_SJLJ_LONGJMP,
- /// TC_RETURN - Tail call return.
- /// operand #0 chain
- /// operand #1 callee (register or absolute)
- /// operand #2 stack adjustment
- /// operand #3 optional in flag
+ /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+ /// the list of operands.
TC_RETURN,
// VZEXT_MOVL - Vector move low and zero extend.
@@ -294,7 +291,7 @@ namespace llvm {
TESTP,
// Several flavors of instructions with vector shuffle behaviors.
- PALIGN,
+ PALIGNR,
PSHUFD,
PSHUFHW,
PSHUFLW,
@@ -794,9 +791,7 @@ namespace llvm {
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -811,20 +806,18 @@ namespace llvm {
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
@@ -841,8 +834,9 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
// Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
@@ -851,7 +845,7 @@ namespace llvm {
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 54b91c3..bb362f5 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -87,12 +87,10 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
- "prefetch $addr", []>;
+ "prefetch\t$addr", []>;
-// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in
def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
- "prefetchw $addr", []>;
+ "prefetchw\t$addr", []>;
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 0eecd5f..d86a406 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -29,11 +29,11 @@ def LEA32r : I<0x8D, MRMSrcMem,
def LEA64_32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins lea64_32mem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+ [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>,
Requires<[In64BitMode]>;
let isReMaterializable = 1 in
-def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
@@ -1256,3 +1256,49 @@ let Predicates = [HasBMI2] in {
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
}
+
+//===----------------------------------------------------------------------===//
+// ADCX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "adcx{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8, OpSize;
+
+ def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "adcx{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+
+ let mayLoad = 1 in {
+ def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "adcx{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8, OpSize;
+
+ def ADCX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "adcx{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ADOX Instruction
+//
+let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8XS;
+
+ def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+
+ let mayLoad = 1 in {
+ def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "adox{l}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8XS;
+
+ def ADOX64rm : I<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "adox{q}\t{$src, $dst|$dst, $src}",
+ [], IIC_BIN_MEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ }
+}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 2a26a22..734e598 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -513,15 +513,19 @@ def CMOV_RFP80 : I<0, Pseudo,
multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
+ let Defs = [EFLAGS, AL] in
def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
(ins i8mem:$ptr, GR8:$val),
!strconcat(mnemonic, "8 PSEUDO!"), []>;
+ let Defs = [EFLAGS, AX] in
def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
(ins i16mem:$ptr, GR16:$val),
!strconcat(mnemonic, "16 PSEUDO!"), []>;
+ let Defs = [EFLAGS, EAX] in
def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
(ins i32mem:$ptr, GR32:$val),
!strconcat(mnemonic, "32 PSEUDO!"), []>;
+ let Defs = [EFLAGS, RAX] in
def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
(ins i64mem:$ptr, GR64:$val),
!strconcat(mnemonic, "64 PSEUDO!"), []>;
@@ -559,7 +563,8 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
- let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
+ let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
+ mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
!strconcat(mnemonic, "6432 PSEUDO!"), []>;
@@ -1076,12 +1081,14 @@ def : Pat<(X86cmp GR64:$src1, 0),
// inverted.
multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
Instruction Inst64> {
- def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
- (Inst16 GR16:$src2, addr:$src1)>;
- def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
- (Inst32 GR32:$src2, addr:$src1)>;
- def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
- (Inst64 GR64:$src2, addr:$src1)>;
+ let Predicates = [HasCMov] in {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+ }
}
defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index f48f133..7759a8a 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -60,14 +60,14 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
defm r213 : fma3p_rm<opc213,
- !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+ !strconcat(OpcodeStr, "213", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
let neverHasSideEffects = 1 in {
defm r132 : fma3p_rm<opc132,
- !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+ !strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
defm r231 : fma3p_rm<opc231,
- !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+ !strconcat(OpcodeStr, "231", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
} // neverHasSideEffects = 1
}
@@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
ComplexPattern mem_cpat> {
let neverHasSideEffects = 1 in {
- defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpVT, mem_frag>;
- defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
x86memop, RC, OpVT, mem_frag>;
}
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
x86memop, RC, OpVT, mem_frag, OpNode>,
- fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+ fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
memop, mem_cpat, Int, RC>;
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 6151d5c..44e574d 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -570,7 +570,7 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin>, TA,
+ : Ii8<o, F, outs, ins, asm, pattern, itin>, TA,
OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7025e93..2a72fb6 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -160,7 +160,7 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
-def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9ecf5e2..d989ec7 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -525,6 +525,13 @@ def lea64_32mem : Operand<i32> {
let ParserMatchClass = X86MemAsmOperand;
}
+// Memory operands that use 64-bit pointers in both ILP32 and LP64.
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
@@ -535,6 +542,12 @@ def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
+// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
+def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or,
+ frameindex, X86WrapperRIP],
+ []>;
+
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
@@ -590,6 +603,7 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
+def HasADX : Predicate<"Subtarget->hasADX()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -856,16 +870,14 @@ let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
Requires<[In64BitMode]>;
-
-
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
mayLoad=1, neverHasSideEffects=1 in {
-def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>,
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
mayStore=1, neverHasSideEffects=1 in {
-def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>,
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
Requires<[In32BitMode]>;
}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 3175324..0979752 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -436,93 +436,69 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
-class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
- SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
- [(set VR128:$dst, (vt (OpNode VR128:$src1,
- (scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string base_opc,
+ string asm_opr> {
+ def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [(set VR128:$dst, (vt (OpNode VR128:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>;
-// Loading from memory automatically zeroing upper bits.
-class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_pat, string OpcodeStr> :
- SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
-
-// AVX
-def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
- VEX_LIG;
-def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
- VEX_LIG;
-
-// For the disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
- def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XS, VEX_4V, VEX_LIG;
- def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XD, VEX_4V, VEX_LIG;
+ // For the disassembler
+ let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
+ (ins VR128:$src1, RC:$src2),
+ !strconcat(base_opc, asm_opr),
+ [], IIC_SSE_MOV_S_RR>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
- VEX_LIG;
- let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
- VEX_LIG;
-}
+multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, string OpcodeStr> {
+ // AVX
+ defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ VEX_4V, VEX_LIG;
-def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- XS, VEX, VEX_LIG;
-def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- XD, VEX, VEX_LIG;
+ def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ VEX, VEX_LIG;
+ // SSE1 & 2
+ let Constraints = "$src1 = $dst" in {
+ defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
+ "\t{$src2, $dst|$dst, $src2}">;
+ }
-// SSE1 & 2
-let Constraints = "$src1 = $dst" in {
- def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
- "movss\t{$src2, $dst|$dst, $src2}">, XS;
- def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
- "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+ def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+}
- // For the disassembler
- let isCodeGenOnly = 1, hasSideEffects = 0 in {
- def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}", [],
- IIC_SSE_MOV_S_RR>, XS;
- def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
- (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}", [],
- IIC_SSE_MOV_S_RR>, XD;
- }
+// Loading from memory automatically zeroing upper bits.
+multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> {
+ def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+ def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))],
+ IIC_SSE_MOV_S_RM>;
}
+defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
+defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD;
+
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+ defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
let AddedComplexity = 20 in
- def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+ defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
}
-def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
-
// Patterns
let Predicates = [HasAVX] in {
let AddedComplexity = 15 in {
@@ -1110,34 +1086,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
// SSE 1 & 2 - Move Low packed FP Instructions
//===----------------------------------------------------------------------===//
-multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
- SDNode psnode, SDNode pdnode, string base_opc,
- string asm_opr, InstrItinClass itin> {
+multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, string asm_opr,
+ InstrItinClass itin> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "s", asm_opr),
- [(set RC:$dst,
- (psnode RC:$src1,
+ [(set VR128:$dst,
+ (psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
itin, SSEPackedSingle>, TB;
def PDrm : PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
- [(set RC:$dst, (v2f64 (pdnode RC:$src1,
+ [(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
itin, SSEPackedDouble>, TB, OpSize;
+
}
-let AddedComplexity = 20 in {
- defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
+ string base_opc, InstrItinClass itin> {
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ itin>, VEX_4V;
+
+let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ "\t{$src2, $dst|$dst, $src2}",
+ itin>;
}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+
+let AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp",
+ IIC_SSE_MOV_LH>;
}
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1235,14 +1218,8 @@ let Predicates = [UseSSE2] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 20 in {
- defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- IIC_SSE_MOV_LH>, VEX_4V;
-}
-let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
- defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
- "\t{$src2, $dst|$dst, $src2}",
- IIC_SSE_MOV_LH>;
+ defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp",
+ IIC_SSE_MOV_LH>;
}
// v2f64 extract element 1 is always custom lowered to unpack high to low
@@ -3012,18 +2989,18 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX], hasSideEffects = 0 in {
def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR32:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
}
@@ -3054,18 +3031,18 @@ multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX], hasSideEffects = 0 in {
def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR32:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
}
@@ -3100,22 +3077,22 @@ multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
let Predicates = [HasAVX] in {
def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
itins.rr>, VEX;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
itins.rm>, VEX;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
itins.rr>, VEX, VEX_L;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
itins.rm>, VEX, VEX_L;
@@ -3135,23 +3112,23 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
OpndItins itins> {
let Predicates = [HasAVX] in {
def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
itins.rr>, VEX;
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
itins.rm>, VEX;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int VR256:$src))],
itins.rr>, VEX, VEX_L;
def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
(ins f256mem:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
itins.rm>, VEX, VEX_L;
@@ -3173,18 +3150,18 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX], hasSideEffects = 0 in {
def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR64:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
let mayLoad = 1 in {
def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1,f64mem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, VEX_4V, VEX_LIG;
}
@@ -3211,22 +3188,22 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, OpndItins itins> {
let Predicates = [HasAVX] in {
def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
itins.rr>, VEX;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
itins.rm>, VEX;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
itins.rr>, VEX, VEX_L;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
itins.rm>, VEX, VEX_L;
@@ -3985,14 +3962,14 @@ multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
let Predicates = [HasAVX] in {
def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
IIC_SSE_PSHUF>, VEX;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
@@ -4002,14 +3979,14 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, i8imm:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
IIC_SSE_PSHUF>, VEX, VEX_L;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
@@ -5190,7 +5167,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
let neverHasSideEffects = 1 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -5210,7 +5187,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
}
}
-multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
let neverHasSideEffects = 1 in {
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -5227,42 +5204,42 @@ multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX] in
- defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V;
let Predicates = [HasAVX2] in
- defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L;
+ defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
- defm PALIGN : ssse3_palign<"palignr">;
+ defm PALIGN : ssse3_palignr<"palignr">;
let Predicates = [HasAVX2] in {
-def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
-def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
}
let Predicates = [HasAVX] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
}
@@ -5590,6 +5567,30 @@ defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
let Predicates = [HasAVX2] in {
+ def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
+ def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
+
+ def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
+
+ def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
+
+ def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
+ (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
+ (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
+ def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
+ (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+
def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
(VPMOVSXWDYrm addr:$src)>;
def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
@@ -5628,6 +5629,15 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE41] in {
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
(bitconvert (v4i32 (X86vzmovl
@@ -5727,6 +5737,15 @@ let Predicates = [HasAVX] in {
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
(VPMOVZXDQrm addr:$src)>;
+ def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
+ def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
+
+ def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
+
+ def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
(scalar_to_vector (loadi64 addr:$src))))))),
(VPMOVSXWDrm addr:$src)>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index ea716bf..3caa1b5 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -352,11 +352,11 @@ def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
// Descriptor-table support instructions
def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
"sgdt\t$dst", [], IIC_SGDT>, TB;
def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
- "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>;
def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
"sidt\t$dst", []>, TB;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
@@ -374,11 +374,11 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
"sldt{q}\t$dst", [], IIC_SLDT>, TB;
def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>;
def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
"lgdt\t$src", [], IIC_LGDT>, TB;
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
+ "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>;
def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
"lidt\t$src", [], IIC_LIDT>, TB;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index cca391f..44d8cce 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -79,7 +79,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
# define CFI(x)
#endif
-// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional
// callee saved registers, for the fastcc calling convention.
extern "C" {
#if defined(X86_64_JIT)
@@ -131,12 +131,12 @@ extern "C" {
"subq $32, %rsp\n"
"movq %rbp, %rcx\n" // Pass prev frame and return address
"movq 8(%rbp), %rdx\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
"addq $32, %rsp\n"
#else
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
#endif
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
@@ -213,7 +213,7 @@ extern "C" {
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
"movl %ebp, %esp\n" // Restore ESP
CFI(".cfi_def_cfa_register %esp\n")
"subl $12, %esp\n"
@@ -269,7 +269,7 @@ extern "C" {
"movl 4(%ebp), %eax\n" // Pass prev frame and return address
"movl %eax, 4(%esp)\n"
"movl %ebp, (%esp)\n"
- "call " ASMPREFIX "X86CompilationCallback2\n"
+ "call " ASMPREFIX "LLVMX86CompilationCallback2\n"
"addl $16, %esp\n"
"movaps 48(%esp), %xmm3\n"
CFI(".cfi_restore %xmm3\n")
@@ -300,10 +300,7 @@ extern "C" {
SIZE(X86CompilationCallback_SSE)
);
# else
- // the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
- // no support for inline assembly
- static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+ void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
_declspec(naked) void X86CompilationCallback(void) {
__asm {
@@ -317,7 +314,7 @@ extern "C" {
mov eax, dword ptr [ebp+4]
mov dword ptr [esp+4], eax
mov dword ptr [esp], ebp
- call X86CompilationCallback2
+ call LLVMX86CompilationCallback2
mov esp, ebp
sub esp, 12
pop ecx
@@ -337,20 +334,17 @@ extern "C" {
#endif
}
-/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// This is the target-specific function invoked by the
/// function stub when we did not know the real target of a call. This function
/// must locate the start of the stub or call site and pass it into the JIT
/// compiler function.
extern "C" {
-#if !(defined (X86_64_JIT) && defined(_MSC_VER))
- // the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
- // no support for inline assembly
-static
-#endif
-void LLVM_ATTRIBUTE_USED
-X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr,
+ intptr_t RetAddr) {
intptr_t *RetAddrLoc = &StackPtr[1];
+ // We are reading raw stack data here. Tell MemorySanitizer that it is
+ // sufficiently initialized.
+ __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc));
assert(*RetAddrLoc == RetAddr &&
"Could not find return address on the stack!");
@@ -517,7 +511,7 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
// This used to use 0xCD, but that value is used by JITMemoryManager to
// initialize the buffer with garbage, which means it may follow a
- // noreturn function call, confusing X86CompilationCallback2. PR 4929.
+ // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929.
JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub!
return Result;
}
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 5a1e1b8..3af1b3e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -239,7 +239,8 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
if (!MI->getOperand(OpNo+i).isReg()) continue;
unsigned Reg = MI->getOperand(OpNo+i).getReg();
- if (Reg == 0) continue;
+ // LEAs can use RIP-relative addressing, and RIP has no sub/super register.
+ if (Reg == 0 || Reg == X86::RIP) continue;
MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
}
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index c22872f..83e75ea 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -33,6 +33,19 @@ using namespace llvm;
STATISTIC(NumBBsPadded, "Number of basic blocks padded");
namespace {
+ struct VisitedBBInfo {
+ // HasReturn - Whether the BB contains a return instruction
+ bool HasReturn;
+
+ // Cycles - Number of cycles until return if HasReturn is true, otherwise
+ // number of cycles until end of the BB
+ unsigned int Cycles;
+
+ VisitedBBInfo() : HasReturn(false), Cycles(0) {}
+ VisitedBBInfo(bool HasReturn, unsigned int Cycles)
+ : HasReturn(HasReturn), Cycles(Cycles) {}
+ };
+
struct PadShortFunc : public MachineFunctionPass {
static char ID;
PadShortFunc() : MachineFunctionPass(ID)
@@ -49,16 +62,21 @@ namespace {
unsigned int Cycles = 0);
bool cyclesUntilReturn(MachineBasicBlock *MBB,
- unsigned int &Cycles,
- MachineBasicBlock::iterator *Location = 0);
+ unsigned int &Cycles);
void addPadding(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &MBBI,
unsigned int NOOPsToAdd);
const unsigned int Threshold;
+
+ // ReturnBBs - Maps basic blocks that return to the minimum number of
+ // cycles until the return, starting from the entry block.
DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
+ // VisitedBBs - Cache of previously visited BBs.
+ DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
+
const TargetMachine *TM;
const TargetInstrInfo *TII;
};
@@ -73,25 +91,26 @@ FunctionPass *llvm::createX86PadShortFunctions() {
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// NOOP instructions before early exits.
bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
- bool OptForSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
-
- if (OptForSize)
+ const AttributeSet &FnAttrs = MF.getFunction()->getAttributes();
+ if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize)) {
return false;
+ }
TM = &MF.getTarget();
TII = TM->getInstrInfo();
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
+ VisitedBBs.clear();
findReturns(MF.begin());
bool MadeChange = false;
- MachineBasicBlock::iterator ReturnLoc;
MachineBasicBlock *MBB;
unsigned int Cycles = 0;
- unsigned int BBCycles;
// Pad the identified basic blocks with NOOPs
for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
@@ -100,8 +119,16 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
Cycles = I->second;
if (Cycles < Threshold) {
- if (!cyclesUntilReturn(MBB, BBCycles, &ReturnLoc))
- continue;
+ // BB ends in a return. Skip over any DBG_VALUE instructions
+ // trailing the terminator.
+ assert(MBB->size() > 0 &&
+ "Basic block should contain at least a RET but is empty");
+ MachineBasicBlock::iterator ReturnLoc = --MBB->end();
+
+ while (ReturnLoc->isDebugValue())
+ --ReturnLoc;
+ assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
+ "Basic block does not end with RET");
addPadding(MBB, ReturnLoc, Threshold - Cycles);
NumBBsPadded++;
@@ -127,18 +154,30 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
// Follow branches in BB and look for returns
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
- I != MBB->succ_end(); ++I) {
+ I != MBB->succ_end(); ++I) {
+ if (*I == MBB)
+ continue;
findReturns(*I, Cycles);
}
}
-/// cyclesUntilReturn - if the MBB has a return instruction, set Location
-/// to the instruction and return true. Return false otherwise.
+/// cyclesUntilReturn - return true if the MBB has a return instruction,
+/// and return false otherwise.
/// Cycles will be incremented by the number of cycles taken to reach the
/// return or the end of the BB, whichever occurs first.
bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
- unsigned int &Cycles,
- MachineBasicBlock::iterator *Location) {
+ unsigned int &Cycles) {
+ // Return cached result if BB was previously visited
+ DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
+ = VisitedBBs.find(MBB);
+ if (it != VisitedBBs.end()) {
+ VisitedBBInfo BBInfo = it->second;
+ Cycles += BBInfo.Cycles;
+ return BBInfo.HasReturn;
+ }
+
+ unsigned int CyclesToEnd = 0;
+
for (MachineBasicBlock::iterator MBBI = MBB->begin();
MBBI != MBB->end(); ++MBBI) {
MachineInstr *MI = MBBI;
@@ -146,14 +185,16 @@ bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
// functions do not count because the called function will be padded,
// if necessary.
if (MI->isReturn() && !MI->isCall()) {
- if (Location)
- *Location = MBBI;
+ VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
+ Cycles += CyclesToEnd;
return true;
}
- Cycles += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
+ CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI);
}
+ VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
+ Cycles += CyclesToEnd;
return false;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 58064b8..16886e4 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -50,7 +50,7 @@ ForceStackAlign("force-align-stack",
" needed for the function."),
cl::init(false), cl::Hidden);
-cl::opt<bool>
+static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
@@ -177,20 +177,21 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (Subtarget.isTarget64BitLP64())
return &X86::GR64RegClass;
return &X86::GR32RegClass;
case 1: // Normal GPRs except the stack pointer (for encoding reasons).
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (Subtarget.isTarget64BitLP64())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
- if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ if (Subtarget.isTargetWin64())
return &X86::GR64_TCW64RegClass;
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ else if (Subtarget.is64Bit())
return &X86::GR64_TCRegClass;
const Function *F = MF.getFunction();
@@ -234,38 +235,40 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const uint16_t *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool callsEHReturn = false;
- bool ghcCall = false;
- bool oclBiCall = false;
- bool hipeCall = false;
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
-
- if (MF) {
- callsEHReturn = MF->getMMI().callsEHReturn();
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
- hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false);
- }
-
- if (ghcCall || hipeCall)
+ switch (MF->getFunction()->getCallingConv()) {
+ case CallingConv::GHC:
+ case CallingConv::HiPE:
return CSR_NoRegs_SaveList;
- if (oclBiCall) {
+
+ case CallingConv::Intel_OCL_BI: {
+ bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (HasAVX && IsWin64)
- return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
+ return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
if (HasAVX && Is64Bit)
- return CSR_64_Intel_OCL_BI_AVX_SaveList;
+ return CSR_64_Intel_OCL_BI_AVX_SaveList;
if (!HasAVX && !IsWin64 && Is64Bit)
- return CSR_64_Intel_OCL_BI_SaveList;
+ return CSR_64_Intel_OCL_BI_SaveList;
+ break;
}
+
+ case CallingConv::Cold:
+ if (Is64Bit)
+ return CSR_MostRegs_64_SaveList;
+ break;
+
+ default:
+ break;
+ }
+
+ bool CallsEHReturn = MF->getMMI().callsEHReturn();
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
- if (callsEHReturn)
+ if (CallsEHReturn)
return CSR_64EHRet_SaveList;
return CSR_64_SaveList;
}
- if (callsEHReturn)
+ if (CallsEHReturn)
return CSR_32EHRet_SaveList;
return CSR_32_SaveList;
}
@@ -286,6 +289,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return CSR_NoRegs_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
+ if (CC == CallingConv::Cold)
+ return CSR_MostRegs_64_RegMask;
if (IsWin64)
return CSR_Win64_RegMask;
return CSR_64_RegMask;
@@ -389,7 +394,13 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// When we need stack realignment and there are dynamic allocas, we can't
// reference off of the stack pointer, so we reserve a base pointer.
- if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ //
+ // This is also true if the function contain MS-style inline assembly. We
+ // do this because if any stack changes occur in the inline assembly, e.g.,
+ // "pusha", then any C local variable or C argument references in the
+ // inline assembly will be wrong because the SP is not properly tracked.
+ if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) ||
+ MF.hasMSInlineAsm())
return true;
return false;
@@ -440,123 +451,16 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
-static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
- if (isInt<8>(Imm))
- return X86::SUB64ri8;
- return X86::SUB64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::SUB32ri8;
- return X86::SUB32ri;
- }
-}
-
-static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
- if (is64Bit) {
- if (isInt<8>(Imm))
- return X86::ADD64ri8;
- return X86::ADD64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::ADD32ri8;
- return X86::ADD32ri;
- }
-}
-
-void X86RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
- int Opcode = I->getOpcode();
- bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
- DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
- I = MBB.erase(I);
-
- if (!reseveCallFrame) {
- // If the stack pointer can be changed after prologue, turn the
- // adjcallstackup instruction into a 'sub ESP, <amt>' and the
- // adjcallstackdown instruction into 'add ESP, <amt>'
- // TODO: consider using push / pop instead of sub + store / add
- if (Amount == 0)
- return;
-
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = 0;
- if (Opcode == TII.getCallFrameSetupOpcode()) {
- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- } else {
- assert(Opcode == TII.getCallFrameDestroyOpcode());
-
- // Factor out the amount the callee already popped.
- Amount -= CalleeAmt;
-
- if (Amount) {
- unsigned Opc = getADDriOpcode(Is64Bit, Amount);
- New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
- }
- }
-
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
- }
-
- return;
- }
-
- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
- MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(CalleeAmt);
-
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-
- // We are not tracking the stack pointer adjustment by the callee, so make
- // sure we restore the stack pointer immediately after the call, there may
- // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
- MachineBasicBlock::iterator B = MBB.begin();
- while (I != B && !llvm::prior(I)->isCall())
- --I;
- MBB.insert(I, New);
- }
-}
-
void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
- unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- int FrameIndex = MI.getOperand(i).getIndex();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr;
unsigned Opc = MI.getOpcode();
@@ -572,7 +476,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
- MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
// Now add the frame object offset to the offset from EBP.
int FIOffset;
@@ -583,17 +487,18 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else
FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
- if (MI.getOperand(i+3).isImm()) {
+ if (MI.getOperand(FIOperandNum+3).isImm()) {
// Offset is a 32-bit integer.
- int Imm = (int)(MI.getOperand(i + 3).getImm());
+ int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
int Offset = FIOffset + Imm;
assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
"Requesting 64-bit offset in 32-bit immediate!");
- MI.getOperand(i + 3).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
- uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
- MI.getOperand(i+3).setOffset(Offset);
+ uint64_t Offset = FIOffset +
+ (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
+ MI.getOperand(FIOperandNum + 3).setOffset(Offset);
}
}
@@ -618,7 +523,15 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
case MVT::i8:
if (High) {
switch (Reg) {
- default: return getX86SubSuperRegister(Reg, MVT::i64, High);
+ default: return getX86SubSuperRegister(Reg, MVT::i64);
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -738,22 +651,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
return X86::R15D;
}
case MVT::i64:
- // For 64-bit mode if we've requested a "high" register and the
- // Q or r constraints we want one of these high registers or
- // just the register name otherwise.
- if (High) {
- switch (Reg) {
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- // Fallthrough.
- }
- }
switch (Reg) {
default: llvm_unreachable("Unexpected register");
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 7932ede..b9d7b8c 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -117,12 +117,9 @@ public:
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index c14407f..d99d085 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass;
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
// The GenericModel contains no instruciton itineraries.
def GenericModel : SchedMachineModel {
let IssueWidth = 4;
let MinLatency = 0;
let LoadLatency = 4;
let HighLatency = 10;
+ let ILPWindow = 10;
}
include "X86ScheduleAtom.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 8710261..1e5f2d6 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel {
// OperandCycles may be used for expected latency.
let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ let ILPWindow = 0; // Always try to hide expected latency.
let Itineraries = AtomItineraries;
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 757e8c7..f934fdd 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -202,6 +202,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
+ // ESI might be used as a base pointer, in that case we can't simply overwrite
+ // the register. Fall back to generic code.
+ const X86RegisterInfo *TRI =
+ static_cast<const X86RegisterInfo *>(DAG.getTarget().getRegisterInfo());
+ if (TRI->hasBasePointer(DAG.getMachineFunction()) &&
+ TRI->getBaseRegister() == X86::ESI)
+ return SDValue();
+
MVT AVT;
if (Align & 1)
AVT = MVT::i8;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 53c28f4..0f2c008 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -14,6 +14,8 @@
#define DEBUG_TYPE "subtarget"
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -155,6 +157,12 @@ const char *X86Subtarget::getBZeroEntry() const {
return 0;
}
+bool X86Subtarget::hasSinCos() const {
+ return getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+ is64Bit();
+}
+
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
@@ -318,45 +326,23 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
}
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
- unsigned StackAlignOverride, bool is64Bit)
- : X86GenSubtargetInfo(TT, CPU, FS)
- , X86ProcFamily(Others)
- , PICStyle(PICStyles::None)
- , X86SSELevel(NoMMXSSE)
- , X863DNowLevel(NoThreeDNow)
- , HasCMov(false)
- , HasX86_64(false)
- , HasPOPCNT(false)
- , HasSSE4A(false)
- , HasAES(false)
- , HasPCLMUL(false)
- , HasFMA(false)
- , HasFMA4(false)
- , HasXOP(false)
- , HasMOVBE(false)
- , HasRDRAND(false)
- , HasF16C(false)
- , HasFSGSBase(false)
- , HasLZCNT(false)
- , HasBMI(false)
- , HasBMI2(false)
- , HasRTM(false)
- , IsBTMemSlow(false)
- , IsUAMemFast(false)
- , HasVectorUAMem(false)
- , HasCmpxchg16b(false)
- , UseLeaForSP(false)
- , HasSlowDivide(false)
- , PostRAScheduler(false)
- , PadShortFunctions(false)
- , stackAlignment(4)
- // FIXME: this is a known good value for Yonah. How about others?
- , MaxInlineSizeThreshold(128)
- , TargetTriple(TT)
- , In64BitMode(is64Bit) {
- // Determine default and user specified characteristics
+void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (!FS.empty() || !CPU.empty()) {
if (CPUName.empty()) {
@@ -433,6 +419,53 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
stackAlignment = 16;
}
+void X86Subtarget::initializeEnvironment() {
+ X86SSELevel = NoMMXSSE;
+ X863DNowLevel = NoThreeDNow;
+ HasCMov = false;
+ HasX86_64 = false;
+ HasPOPCNT = false;
+ HasSSE4A = false;
+ HasAES = false;
+ HasPCLMUL = false;
+ HasFMA = false;
+ HasFMA4 = false;
+ HasXOP = false;
+ HasMOVBE = false;
+ HasRDRAND = false;
+ HasF16C = false;
+ HasFSGSBase = false;
+ HasLZCNT = false;
+ HasBMI = false;
+ HasBMI2 = false;
+ HasRTM = false;
+ HasADX = false;
+ IsBTMemSlow = false;
+ IsUAMemFast = false;
+ HasVectorUAMem = false;
+ HasCmpxchg16b = false;
+ UseLeaForSP = false;
+ HasSlowDivide = false;
+ PostRAScheduler = false;
+ PadShortFunctions = false;
+ stackAlignment = 4;
+ // FIXME: this is a known good value for Yonah. How about others?
+ MaxInlineSizeThreshold = 128;
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
+ , X86ProcFamily(Others)
+ , PICStyle(PICStyles::None)
+ , TargetTriple(TT)
+ , StackAlignOverride(StackAlignOverride)
+ , In64BitMode(is64Bit) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
bool X86Subtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 080f4cf..e97da4b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -121,6 +121,9 @@ protected:
/// HasRTM - Processor has RTM instructions.
bool HasRTM;
+ /// HasADX - Processor has ADX instructions.
+ bool HasADX;
+
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -165,11 +168,13 @@ protected:
InstrItineraryData InstrItins;
private:
+ /// StackAlignOverride - Override the stack alignment.
+ unsigned StackAlignOverride;
+
/// In64BitMode - True if compiling for 64-bit, false for 32-bit.
bool In64BitMode;
public:
-
/// This constructor initializes the data members to match that
/// of the specified triple.
///
@@ -194,7 +199,26 @@ public:
/// instruction.
void AutoDetectSubtargetFeatures();
- bool is64Bit() const { return In64BitMode; }
+ /// \brief Reset the features for the X86 target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+public:
+ /// Is this x86_64? (disregarding specific ABI / programming model)
+ bool is64Bit() const {
+ return In64BitMode;
+ }
+
+ /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
+ bool isTarget64BitILP32() const {
+ return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32);
+ }
+
+ /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
+ bool isTarget64BitLP64() const {
+ return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
+ }
PICStyles::Style getPICStyle() const { return PICStyle; }
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
@@ -229,6 +253,7 @@ public:
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasRTM() const { return HasRTM; }
+ bool hasADX() const { return HasADX; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
@@ -315,6 +340,10 @@ public:
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *getBZeroEntry() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 706e64a..8aa58a2 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -59,8 +59,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
- DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
- "n8:16:32:64-S128"),
+ // The x32 ABI dictates the ILP32 programming model for x64.
+ DL(getSubtargetImpl()->isTarget64BitILP32() ?
+ "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128" :
+ "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+ "n8:16:32:64-S128"),
InstrInfo(*this),
TLInfo(*this),
TSInfo(*this),
@@ -151,6 +155,7 @@ public:
}
virtual bool addInstSelector();
+ virtual bool addILPOpts();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
virtual bool addPreEmitPass();
@@ -158,12 +163,7 @@ public:
} // namespace
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
- X86PassConfig *PC = new X86PassConfig(this, PM);
-
- if (X86EarlyIfConv && Subtarget.hasCMov())
- PC->enablePass(&EarlyIfConverterID);
-
- return PC;
+ return new X86PassConfig(this, PM);
}
bool X86PassConfig::addInstSelector() {
@@ -181,6 +181,14 @@ bool X86PassConfig::addInstSelector() {
return false;
}
+bool X86PassConfig::addILPOpts() {
+ if (X86EarlyIfConv && getX86Subtarget().hasCMov()) {
+ addPass(&EarlyIfConverterID);
+ return true;
+ }
+ return false;
+}
+
bool X86PassConfig::addPreRegAlloc() {
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index b8ee319..871dacd 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -8,16 +8,12 @@
//===----------------------------------------------------------------------===//
#include "X86TargetObjectFile.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
#include "llvm/Target/Mangler.h"
+
using namespace llvm;
using namespace dwarf;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 9cc1b18..fefb479 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
using namespace llvm;
// Declare the pass initialization routine locally as target-specific passes
@@ -75,7 +76,6 @@ public:
/// \name Scalar TTI Implementations
/// @{
-
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
/// @}
@@ -84,6 +84,8 @@ public:
/// @{
virtual unsigned getNumberOfRegisters(bool Vector) const;
+ virtual unsigned getRegisterBitWidth(bool Vector) const;
+ virtual unsigned getMaximumUnrollFactor() const;
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
int Index, Type *SubTp) const;
@@ -118,45 +120,6 @@ llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) {
//
//===----------------------------------------------------------------------===//
-namespace {
-struct X86CostTblEntry {
- int ISD;
- MVT Type;
- unsigned Cost;
-};
-}
-
-static int
-FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) {
- for (unsigned int i = 0; i < len; ++i)
- if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty)
- return i;
-
- // Could not find an entry.
- return -1;
-}
-
-namespace {
-struct X86TypeConversionCostTblEntry {
- int ISD;
- MVT Dst;
- MVT Src;
- unsigned Cost;
-};
-}
-
-static int
-FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
- int ISD, MVT Dst, MVT Src) {
- for (unsigned int i = 0; i < len; ++i)
- if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst)
- return i;
-
- // Could not find an entry.
- return -1;
-}
-
-
X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
// TODO: Currently the __builtin_popcount() implementation using SSE3
@@ -166,11 +129,39 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
}
unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasSSE1())
+ return 0;
+
if (ST->is64Bit())
return 16;
return 8;
}
+unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAVX()) return 256;
+ if (ST->hasSSE1()) return 128;
+ return 0;
+ }
+
+ if (ST->is64Bit())
+ return 64;
+ return 32;
+
+}
+
+unsigned X86TTI::getMaximumUnrollFactor() const {
+ if (ST->isAtom())
+ return 1;
+
+ // Sandybridge and Haswell have multiple execution ports and pipelined
+ // vector units.
+ if (ST->hasAVX())
+ return 4;
+
+ return 2;
+}
+
unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -178,7 +169,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const X86CostTblEntry AVX1CostTable[] = {
+ static const CostTblEntry<MVT> AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
@@ -192,7 +183,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
// Look for AVX1 lowering tricks.
if (ST->hasAVX()) {
- int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
+ int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable), ISD,
LT.second);
if (Idx != -1)
return LT.first * AVX1CostTable[Idx].Cost;
@@ -226,7 +217,7 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
if (!SrcTy.isSimple() || !DstTy.isSimple())
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
- static const X86TypeConversionCostTblEntry AVXConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
@@ -241,11 +232,14 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
};
if (ST->hasAVX()) {
- int Idx = FindInConvertTable(AVXConversionTbl,
+ int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl,
array_lengthof(AVXConversionTbl),
ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
@@ -265,7 +259,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const X86CostTblEntry SSE42CostTbl[] = {
+ static const CostTblEntry<MVT> SSE42CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 1 },
{ ISD::SETCC, MVT::v4f32, 1 },
{ ISD::SETCC, MVT::v2i64, 1 },
@@ -274,7 +268,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i8, 1 },
};
- static const X86CostTblEntry AVX1CostTbl[] = {
+ static const CostTblEntry<MVT> AVX1CostTbl[] = {
{ ISD::SETCC, MVT::v4f64, 1 },
{ ISD::SETCC, MVT::v8f32, 1 },
// AVX1 does not support 8-wide integer compare.
@@ -284,7 +278,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 4 },
};
- static const X86CostTblEntry AVX2CostTbl[] = {
+ static const CostTblEntry<MVT> AVX2CostTbl[] = {
{ ISD::SETCC, MVT::v4i64, 1 },
{ ISD::SETCC, MVT::v8i32, 1 },
{ ISD::SETCC, MVT::v16i16, 1 },
@@ -292,19 +286,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
};
if (ST->hasAVX2()) {
- int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
+ int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy);
if (Idx != -1)
return LT.first * AVX2CostTbl[Idx].Cost;
}
if (ST->hasAVX()) {
- int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
+ int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy);
if (Idx != -1)
return LT.first * AVX1CostTbl[Idx].Cost;
}
if (ST->hasSSE42()) {
- int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
+ int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy);
if (Idx != -1)
return LT.first * SSE42CostTbl[Idx].Cost;
}
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index c4a5887..0f77948 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
return false;
}
+static bool clobbersAllYmmRegs(const MachineOperand &MO) {
+ for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
+ if (!MO.clobbersPhysReg(reg))
+ return false;
+ }
+ return true;
+}
+
static bool hasYmmReg(MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
+ if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+ return true;
if (!MO.isReg())
continue;
if (MO.isDebug())
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 094f18c..7e7d396 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -92,11 +92,19 @@ static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+
static DecodeStatus Decode2RInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus Decode2RImmInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeR2RInstruction(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -132,6 +140,66 @@ static DecodeStatus DecodeLR2RInstruction(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus Decode3RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode3RImmInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus Decode2RUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL3RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL3RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RUSInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL2RUSBitpInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL6RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL5RInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
#include "XCoreGenDisassemblerTables.inc"
static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
@@ -157,13 +225,24 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ Inst.addOperand(MCOperand::CreateImm(0));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus
Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) {
- unsigned Combined = fieldFromInstruction(Insn, 6, 5) +
- fieldFromInstruction(Insn, 5, 1) * 5 - 27;
- if (Combined >= 9)
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+ if (Combined < 27)
return MCDisassembler::Fail;
-
+ if (fieldFromInstruction(Insn, 5, 1)) {
+ if (Combined == 31)
+ return MCDisassembler::Fail;
+ Combined += 5;
+ }
+ Combined -= 27;
unsigned Op1High = Combined % 3;
unsigned Op2High = Combined / 3;
Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2);
@@ -172,14 +251,114 @@ Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) {
}
static DecodeStatus
+Decode3OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2,
+ unsigned &Op3) {
+ unsigned Combined = fieldFromInstruction(Insn, 6, 5);
+ if (Combined >= 27)
+ return MCDisassembler::Fail;
+
+ unsigned Op1High = Combined % 3;
+ unsigned Op2High = (Combined / 3) % 3;
+ unsigned Op3High = Combined / 9;
+ Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 4, 2);
+ Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 2, 2);
+ Op3 = (Op3High << 2) | fieldFromInstruction(Insn, 0, 2);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus
+Decode2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a 3R instruction.
+ unsigned Opcode = fieldFromInstruction(Insn, 11, 5);
+ switch (Opcode) {
+ case 0x0:
+ Inst.setOpcode(XCore::STW_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x1:
+ Inst.setOpcode(XCore::LDW_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x2:
+ Inst.setOpcode(XCore::ADD_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x3:
+ Inst.setOpcode(XCore::SUB_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x4:
+ Inst.setOpcode(XCore::SHL_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x5:
+ Inst.setOpcode(XCore::SHR_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x6:
+ Inst.setOpcode(XCore::EQ_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x7:
+ Inst.setOpcode(XCore::AND_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x8:
+ Inst.setOpcode(XCore::OR_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x9:
+ Inst.setOpcode(XCore::LDW_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x10:
+ Inst.setOpcode(XCore::LD16S_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x11:
+ Inst.setOpcode(XCore::LD8U_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x12:
+ Inst.setOpcode(XCore::ADD_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x13:
+ Inst.setOpcode(XCore::SUB_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x14:
+ Inst.setOpcode(XCore::SHL_2rus);
+ return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x15:
+ Inst.setOpcode(XCore::SHR_2rus);
+ return Decode2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x16:
+ Inst.setOpcode(XCore::EQ_2rus);
+ return Decode2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x17:
+ Inst.setOpcode(XCore::TSETR_3r);
+ return Decode3RImmInstruction(Inst, Insn, Address, Decoder);
+ case 0x18:
+ Inst.setOpcode(XCore::LSS_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x19:
+ Inst.setOpcode(XCore::LSU_3r);
+ return Decode3RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
- }
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode2RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2;
+ DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(Op1));
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
return S;
}
@@ -188,10 +367,11 @@ DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
- }
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
return S;
}
@@ -200,11 +380,12 @@ Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
- }
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
return S;
}
@@ -213,10 +394,11 @@ DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- Inst.addOperand(MCOperand::CreateImm(Op2));
- }
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op2));
return S;
}
@@ -225,10 +407,11 @@ DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeBitpOperand(Inst, Op2, Address, Decoder);
- }
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
return S;
}
@@ -237,24 +420,97 @@ DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeBitpOperand(Inst, Op2, Address, Decoder);
- }
+ if (S != MCDisassembler::Success)
+ return Decode2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeBitpOperand(Inst, Op2, Address, Decoder);
return S;
}
static DecodeStatus
+DecodeL2OpInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a L3R / L2RUS instruction.
+ unsigned Opcode = fieldFromInstruction(Insn, 16, 4) |
+ fieldFromInstruction(Insn, 27, 5) << 4;
+ switch (Opcode) {
+ case 0x0c:
+ Inst.setOpcode(XCore::STW_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x1c:
+ Inst.setOpcode(XCore::XOR_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x2c:
+ Inst.setOpcode(XCore::ASHR_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x3c:
+ Inst.setOpcode(XCore::LDAWF_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x4c:
+ Inst.setOpcode(XCore::LDAWB_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x5c:
+ Inst.setOpcode(XCore::LDA16F_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x6c:
+ Inst.setOpcode(XCore::LDA16B_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x7c:
+ Inst.setOpcode(XCore::MUL_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x8c:
+ Inst.setOpcode(XCore::DIVS_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x9c:
+ Inst.setOpcode(XCore::DIVU_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x10c:
+ Inst.setOpcode(XCore::ST16_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x11c:
+ Inst.setOpcode(XCore::ST8_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x12c:
+ Inst.setOpcode(XCore::ASHR_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x12d:
+ Inst.setOpcode(XCore::OUTPW_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x12e:
+ Inst.setOpcode(XCore::INPW_l2rus);
+ return DecodeL2RUSBitpInstruction(Inst, Insn, Address, Decoder);
+ case 0x13c:
+ Inst.setOpcode(XCore::LDAWF_l2rus);
+ return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x14c:
+ Inst.setOpcode(XCore::LDAWB_l2rus);
+ return DecodeL2RUSInstruction(Inst, Insn, Address, Decoder);
+ case 0x15c:
+ Inst.setOpcode(XCore::CRC_l3r);
+ return DecodeL3RSrcDstInstruction(Inst, Insn, Address, Decoder);
+ case 0x18c:
+ Inst.setOpcode(XCore::REMS_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ case 0x19c:
+ Inst.setOpcode(XCore::REMU_l3r);
+ return DecodeL3RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
const void *Decoder) {
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
Op1, Op2);
- if (S == MCDisassembler::Success) {
- DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
- DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
- }
+ if (S != MCDisassembler::Success)
+ return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
return S;
}
@@ -264,9 +520,212 @@ DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
unsigned Op1, Op2;
DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16),
Op1, Op2);
+ if (S != MCDisassembler::Success)
+ return DecodeL2OpInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+Decode3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode3RImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ Inst.addOperand(MCOperand::CreateImm(Op1));
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op3));
+ }
+ return S;
+}
+
+static DecodeStatus
+Decode2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S = Decode3OpInstruction(Insn, Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeBitpOperand(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL3RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL3RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(Op3));
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL2RUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeBitpOperand(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL6RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3, Op4, Op5, Op6;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S != MCDisassembler::Success)
+ return S;
+ S = Decode3OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5, Op6);
+ if (S != MCDisassembler::Success)
+ return S;
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op6, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL5RInstructionFail(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ // Try and decode as a L6R instruction.
+ Inst.clear();
+ unsigned Opcode = fieldFromInstruction(Insn, 27, 5);
+ switch (Opcode) {
+ case 0x00:
+ Inst.setOpcode(XCore::LMUL_l6r);
+ return DecodeL6RInstruction(Inst, Insn, Address, Decoder);
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus
+DecodeL5RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3, Op4, Op5;
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S != MCDisassembler::Success)
+ return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+ S = Decode2OpInstruction(fieldFromInstruction(Insn, 16, 16), Op4, Op5);
+ if (S != MCDisassembler::Success)
+ return DecodeL5RInstructionFail(Inst, Insn, Address, Decoder);
+
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op5, Address, Decoder);
+ return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ }
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
+ }
+ return S;
+}
+
+static DecodeStatus
+DecodeL4RSrcDstSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ unsigned Op1, Op2, Op3;
+ unsigned Op4 = fieldFromInstruction(Insn, 16, 4);
+ DecodeStatus S =
+ Decode3OpInstruction(fieldFromInstruction(Insn, 0, 16), Op1, Op2, Op3);
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ S = DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ }
+ if (S == MCDisassembler::Success) {
+ DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op4, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder);
+ DecodeGRRegsRegisterClass(Inst, Op3, Address, Decoder);
}
return S;
}
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index ea77d92..0d146ba 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -171,7 +171,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
- OutStreamer.EmitZeros(4 - Size, 0);
+ OutStreamer.EmitZeros(4 - Size);
// Mark the end of the global
OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index bb9c77a..019c457 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -332,6 +332,58 @@ bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (!hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+ errs() << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+#endif
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
void
XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index db1bbb6..ebad62f 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -39,6 +39,10 @@ namespace llvm {
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
bool hasFP(const MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 472ce63..fbf86c5 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -211,15 +211,10 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
Ops, 4);
}
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- case Intrinsic::xcore_crc8:
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2), N->getOperand(3) };
- return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
- Ops, 3);
- }
- break;
+ case XCoreISD::CRC8: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
}
case ISD::BRIND:
if (SDNode *ResNode = SelectBRIND(N))
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6e894ac..f8a9125 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -54,6 +54,7 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::LMUL : return "XCoreISD::LMUL";
case XCoreISD::MACCU : return "XCoreISD::MACCU";
case XCoreISD::MACCS : return "XCoreISD::MACCS";
+ case XCoreISD::CRC8 : return "XCoreISD::CRC8";
case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
default : return NULL;
@@ -152,9 +153,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
- maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
- maxStoresPerMemmove = maxStoresPerMemmoveOptSize
- = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 4;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize
+ = MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 2;
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::STORE);
@@ -167,24 +171,25 @@ SDValue XCoreTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode())
{
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::BR_JT: return LowerBR_JT(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::VAARG: return LowerVAARG(Op, DAG);
- case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
- case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
+ case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, DAG);
// FIXME: Remove these when LegalizeDAGTypes lands.
case ISD::ADD:
- case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
- case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -736,13 +741,13 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
XCoreISD::LSUB;
SDValue Zero = DAG.getConstant(0, MVT::i32);
- SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
- LHSL, RHSL, Zero);
- SDValue Lo(Carry.getNode(), 1);
+ SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Carry(Lo.getNode(), 1);
- SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
- LHSH, RHSH, Carry);
- SDValue Hi(Ignored.getNode(), 1);
+ SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Ignored(Hi.getNode(), 1);
// Merge the pieces
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
}
@@ -858,6 +863,23 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
}
+SDValue XCoreTargetLowering::
+LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::xcore_crc8:
+ EVT VT = Op.getValueType();
+ SDValue Data =
+ DAG.getNode(XCoreISD::CRC8, DL, DAG.getVTList(VT, VT),
+ Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3));
+ SDValue Crc(Data.getNode(), 1);
+ SDValue Results[] = { Crc, Data };
+ return DAG.getMergeValues(Results, 2, DL);
+ }
+ return SDValue();
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1227,15 +1249,11 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Return on XCore is always a "retsp 0"
+ RetOps.push_back(DAG.getConstant(0, MVT::i32));
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1248,15 +1266,17 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// guarantee that all emitted copies are
// stuck together, avoiding something bad
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- // Return on XCore is always a "retsp 0"
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.getNode())
- return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
- Chain, DAG.getConstant(0, MVT::i32), Flag);
- else // Return Void
- return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
- Chain, DAG.getConstant(0, MVT::i32));
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ &RetOps[0], RetOps.size());
}
//===----------------------------------------------------------------------===//
@@ -1353,13 +1373,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Carry = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
DAG.getConstant(1, VT));
- SDValue Ops [] = { Carry, Result };
+ SDValue Ops[] = { Result, Carry };
return DAG.getMergeValues(Ops, 2, dl);
}
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1367,7 +1387,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
if ((KnownZero & Mask) == Mask) {
SDValue Carry = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
- SDValue Ops [] = { Carry, Result };
+ SDValue Ops[] = { Result, Carry };
return DAG.getMergeValues(Ops, 2, dl);
}
}
@@ -1391,14 +1411,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Borrow = N2;
SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, VT), N2);
- SDValue Ops [] = { Borrow, Result };
+ SDValue Ops[] = { Result, Borrow };
return DAG.getMergeValues(Ops, 2, dl);
}
}
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1406,7 +1426,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
if ((KnownZero & Mask) == Mask) {
SDValue Borrow = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
- SDValue Ops [] = { Borrow, Result };
+ SDValue Ops[] = { Result, Borrow };
return DAG.getMergeValues(Ops, 2, dl);
}
}
@@ -1432,11 +1452,15 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// If the high result is unused fold to add(a, b)
if (N->hasNUsesOfValue(0, 0)) {
SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
- SDValue Ops [] = { Lo, Lo };
+ SDValue Ops[] = { Lo, Lo };
return DAG.getMergeValues(Ops, 2, dl);
}
// Otherwise fold to ladd(a, b, 0)
- return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ SDValue Result =
+ DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+ SDValue Carry(Result.getNode(), 1);
+ SDValue Ops[] = { Carry, Result };
+ return DAG.getMergeValues(Ops, 2, dl);
}
}
break;
@@ -1530,7 +1554,7 @@ void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
default: break;
case XCoreISD::LADD:
case XCoreISD::LSUB:
- if (Op.getResNo() == 0) {
+ if (Op.getResNo() == 1) {
// Top bits of carry / borrow are clear.
KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
KnownZero.getBitWidth() - 1);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 2874f00..6d430ef 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -63,6 +63,9 @@ namespace llvm {
// Corresponds to MACCS instruction
MACCS,
+ // Corresponds to CRC8 instruction
+ CRC8,
+
// Jumptable branch.
BR_JT,
@@ -147,6 +150,7 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 44ac45c..379cc39 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -33,44 +33,122 @@ class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
// Instruction formats
//===----------------------------------------------------------------------===//
-class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _F3R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc;
+ let DecoderMethod = "Decode3RInstruction";
}
-class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+// 3R with first operand as an immediate. Used for TSETR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F3RImm<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F3R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode3RImmInstruction";
+}
+
+class _FL3R<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{8-4};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{3-0};
+
+ let Inst{15-11} = 0b11111;
+ let DecoderMethod = "DecodeL3RInstruction";
}
-class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+// L3R with first operand as both a source and a destination.
+class _FL3RSrcDst<bits<9> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : _FL3R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL3RSrcDstInstruction";
+}
+
+class _F2RUS<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<2, outs, ins, asmstr, pattern> {
+ let Inst{15-11} = opc;
+ let DecoderMethod = "Decode2RUSInstruction";
+}
+
+// 2RUS with bitp operand
+class _F2RUSBitp<bits<5> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _F2RUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RUSBitpInstruction";
}
-class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FL2RUS<bits<9> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{8-4};
+ let Inst{26-20} = 0b1111110;
+ let Inst{19-16} = opc{3-0};
+
+ let Inst{15-11} = 0b11111;
+ let DecoderMethod = "DecodeL2RUSInstruction";
+}
+
+// L2RUS with bitp operand
+class _FL2RUSBitp<bits<9> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL2RUS<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL2RUSBitpInstruction";
}
-class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<4> a;
+ bits<6> b;
+
+ let Inst{15-10} = opc;
+ let Inst{9-6} = a;
+ let Inst{5-0} = b;
}
-class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FLRU6<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<4> a;
+ bits<16> b;
+
+ let Inst{31-26} = opc;
+ let Inst{25-22} = a;
+ let Inst{21-16} = b{5-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = b{15-6};
}
-class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<6> a;
+
+ let Inst{15-6} = opc;
+ let Inst{5-0} = a;
}
-class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FLU6<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<16> a;
+
+ let Inst{31-22} = opc;
+ let Inst{21-16} = a{5-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = a{15-6};
}
-class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<2, outs, ins, asmstr, pattern> {
+ bits<10> a;
+
+ let Inst{15-10} = opc;
+ let Inst{9-0} = a;
}
-class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FLU10<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<20> a;
+
+ let Inst{31-26} = opc;
+ let Inst{25-16} = a{9-0};
+ let Inst{15-10} = 0b111100;
+ let Inst{9-0} = a{19-10};
}
class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -80,6 +158,14 @@ class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
let DecoderMethod = "Decode2RInstruction";
}
+// 2R with first operand as an immediate. Used for TSETMR where the first
+// operand is treated as an immediate since it refers to a register number in
+// another thread.
+class _F2RImm<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : _F2R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "Decode2RImmInstruction";
+}
+
// 2R with first operand as both a source and a destination.
class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> {
@@ -148,14 +234,44 @@ class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{4-0} = opc{4-0};
}
-class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FL4R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ bits<4> d;
+
+ let Inst{31-27} = opc{5-1};
+ let Inst{26-21} = 0b111111;
+ let Inst{20} = opc{0};
+ let Inst{19-16} = d;
+ let Inst{15-11} = 0b11111;
}
-class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+// L4R with 4th operand as both a source and a destination.
+class _FL4RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL4R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL4RSrcDstInstruction";
+}
+
+// L4R with 1st and 4th operand as both a source and a destination.
+class _FL4RSrcDstSrcDst<bits<6> opc, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : _FL4R<opc, outs, ins, asmstr, pattern> {
+ let DecoderMethod = "DecodeL4RSrcDstSrcDstInstruction";
+}
+
+class _FL5R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc{5-1};
+ let Inst{20} = opc{0};
+ let Inst{15-11} = 0b11111;
+
+ let DecoderMethod = "DecodeL5RInstruction";
}
-class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+class _FL6R<bits<5> opc, dag outs, dag ins, string asmstr, list<dag> pattern>
: InstXCore<4, outs, ins, asmstr, pattern> {
+ let Inst{31-27} = opc;
+ let Inst{15-11} = 0b11111;
+
+ let DecoderMethod = "DecodeL6RInstruction";
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 95b076f..e140ef2 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -32,8 +32,8 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
- [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>;
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
+ [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad, SDNPVariadic]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -182,6 +182,7 @@ def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
// Address operands
def MEMii : Operand<i32> {
let PrintMethod = "printMemOperand";
+ let DecoderMethod = "DecodeMEMiiOperand";
let MIOperandInfo = (ops i32imm, i32imm);
}
@@ -200,146 +201,110 @@ def InlineJT32 : Operand<i32> {
// Three operand short
-multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
- def _3r: _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _2rus : _F2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+multiclass F3R_2RUS<bits<5> opc1, bits<5> opc2, string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
}
-multiclass F3R_2RUS_np<string OpcStr> {
- def _3r: _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- []>;
- def _2rus : _F2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- []>;
+multiclass F3R_2RUS_np<bits<5> opc1, bits<5> opc2, string OpcStr> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
+ def _2rus : _F2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
}
-multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
- def _3r: _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _2rus : _F2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+multiclass F3R_2RBITP<bits<5> opc1, bits<5> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _3r: _F3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
}
-class F3R<string OpcStr, SDNode OpNode> : _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+class F3R<bits<5> opc, string OpcStr, SDNode OpNode> :
+ _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
-class F3R_np<string OpcStr> : _F3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- []>;
+class F3R_np<bits<5> opc, string OpcStr> :
+ _F3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"), []>;
// Three operand long
/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
-multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
- def _l3r: _FL3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _l2rus : _FL2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+multiclass FL3R_L2RUS<bits<9> opc1, bits<9> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
}
/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
-multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
- def _l3r: _FL3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
- def _l2rus : _FL2RUS<
- (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+multiclass FL3R_L2RBITP<bits<9> opc1, bits<9> opc2, string OpcStr,
+ SDNode OpNode> {
+ def _l3r: _FL3R<opc1, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUSBitp<opc2, (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
}
-class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
- (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
- !strconcat(OpcStr, " $dst, $b, $c"),
- [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+class FL3R<bits<9> opc, string OpcStr, SDNode OpNode> :
+ _FL3R<opc, (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
// Register - U6
// Operand register - U6
-multiclass FRU6_LRU6_branch<string OpcStr> {
- def _ru6: _FRU6<
- (outs), (ins GRRegs:$cond, brtarget:$dest),
- !strconcat(OpcStr, " $cond, $dest"),
- []>;
- def _lru6: _FLRU6<
- (outs), (ins GRRegs:$cond, brtarget:$dest),
- !strconcat(OpcStr, " $cond, $dest"),
- []>;
+multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
}
-multiclass FRU6_LRU6_cp<string OpcStr> {
- def _ru6: _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$a),
- !strconcat(OpcStr, " $dst, cp[$a]"),
- []>;
- def _lru6: _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$a),
- !strconcat(OpcStr, " $dst, cp[$a]"),
- []>;
+multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, -$b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
+ !strconcat(OpcStr, " $a, -$b"), []>;
}
-// U6
-multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
- def _u6: _FU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(OpNode immU6:$b)]>;
- def _lu6: _FLU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(OpNode immU16:$b)]>;
+multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
+ def _ru6: _FRU6<opc, (outs GRRegs:$a), (ins i32imm:$b),
+ !strconcat(OpcStr, " $a, cp[$b]"), []>;
+ def _lru6: _FLRU6<opc, (outs GRRegs:$a), (ins i32imm:$b),
+ !strconcat(OpcStr, " $a, cp[$b]"), []>;
}
-multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
- def _u6: _FU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(Int immU6:$b)]>;
- def _lu6: _FLU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- [(Int immU16:$b)]>;
+
+// U6
+multiclass FU6_LU6<bits<10> opc, string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(OpNode immU6:$a)]>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(OpNode immU16:$a)]>;
}
-multiclass FU6_LU6_np<string OpcStr> {
- def _u6: _FU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
- def _lu6: _FLU6<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
+multiclass FU6_LU6_int<bits<10> opc, string OpcStr, Intrinsic Int> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(Int immU6:$a)]>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"),
+ [(Int immU16:$a)]>;
}
-// U10
-multiclass FU10_LU10_np<string OpcStr> {
- def _u10: _FU10<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
- def _lu10: _FLU10<
- (outs), (ins i32imm:$b),
- !strconcat(OpcStr, " $b"),
- []>;
+multiclass FU6_LU6_np<bits<10> opc, string OpcStr> {
+ def _u6: _FU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
+ def _lu6: _FLU6<opc, (outs), (ins i32imm:$a), !strconcat(OpcStr, " $a"), []>;
}
// Two operand short
@@ -390,368 +355,351 @@ let usesCustomInserter = 1 in {
//===----------------------------------------------------------------------===//
// Three operand short
-defm ADD : F3R_2RUS<"add", add>;
-defm SUB : F3R_2RUS<"sub", sub>;
+defm ADD : F3R_2RUS<0b00010, 0b10010, "add", add>;
+defm SUB : F3R_2RUS<0b00011, 0b10011, "sub", sub>;
let neverHasSideEffects = 1 in {
-defm EQ : F3R_2RUS_np<"eq">;
-def LSS_3r : F3R_np<"lss">;
-def LSU_3r : F3R_np<"lsu">;
+defm EQ : F3R_2RUS_np<0b00110, 0b10110, "eq">;
+def LSS_3r : F3R_np<0b11000, "lss">;
+def LSU_3r : F3R_np<0b11001, "lsu">;
}
-def AND_3r : F3R<"and", and>;
-def OR_3r : F3R<"or", or>;
+def AND_3r : F3R<0b00111, "and", and>;
+def OR_3r : F3R<0b01000, "or", or>;
let mayLoad=1 in {
-def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ldw $dst, $addr[$offset]",
- []>;
+def LDW_3r : _F3R<0b01001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]", []>;
-def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
- "ldw $dst, $addr[$offset]",
- []>;
+def LDW_2rus : _F2RUS<0b00001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]", []>;
-def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ld16s $dst, $addr[$offset]",
- []>;
+def LD16S_3r : _F3R<0b10000, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]", []>;
-def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ld8u $dst, $addr[$offset]",
- []>;
+def LD8U_3r : _F3R<0b10001, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]", []>;
}
let mayStore=1 in {
-def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
- "stw $val, $addr[$offset]",
- []>;
+def STW_l3r : _FL3R<0b000001100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]", []>;
-def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
- "stw $val, $addr[$offset]",
- []>;
+def STW_2rus : _F2RUS<0b0000, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]", []>;
}
-defm SHL : F3R_2RBITP<"shl", shl>;
-defm SHR : F3R_2RBITP<"shr", srl>;
-// TODO tsetr
+defm SHL : F3R_2RBITP<0b00100, 0b10100, "shl", shl>;
+defm SHR : F3R_2RBITP<0b00101, 0b10101, "shr", srl>;
+
+// The first operand is treated as an immediate since it refers to a register
+// number in another thread.
+def TSETR_3r : _F3RImm<0b10111, (outs), (ins i32imm:$a, GRRegs:$b, GRRegs:$c),
+ "set t[$c]:r$a, $b", []>;
// Three operand long
-def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ldaw $dst, $addr[$offset]",
- [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+def LDAWF_l3r : _FL3R<0b000111100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst,
+ (ldawf GRRegs:$addr, GRRegs:$offset))]>;
let neverHasSideEffects = 1 in
-def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
- (ins GRRegs:$addr, i32imm:$offset),
- "ldaw $dst, $addr[$offset]",
- []>;
+def LDAWF_l2rus : _FL2RUS<0b100111100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]", []>;
-def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "ldaw $dst, $addr[-$offset]",
- [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+def LDAWB_l3r : _FL3R<0b001001100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst,
+ (ldawb GRRegs:$addr, GRRegs:$offset))]>;
let neverHasSideEffects = 1 in
-def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
- (ins GRRegs:$addr, i32imm:$offset),
- "ldaw $dst, $addr[-$offset]",
- []>;
-
-def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "lda16 $dst, $addr[$offset]",
- [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
-
-def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
- "lda16 $dst, $addr[-$offset]",
- [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
-
-def MUL_l3r : FL3R<"mul", mul>;
+def LDAWB_l2rus : _FL2RUS<0b101001100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]", []>;
+
+def LDA16F_l3r : _FL3R<0b001011100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst,
+ (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<0b001101100, (outs GRRegs:$dst),
+ (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst,
+ (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<0b001111100, "mul", mul>;
// Instructions which may trap are marked as side effecting.
let hasSideEffects = 1 in {
-def DIVS_l3r : FL3R<"divs", sdiv>;
-def DIVU_l3r : FL3R<"divu", udiv>;
-def REMS_l3r : FL3R<"rems", srem>;
-def REMU_l3r : FL3R<"remu", urem>;
+def DIVS_l3r : FL3R<0b010001100, "divs", sdiv>;
+def DIVU_l3r : FL3R<0b010011100, "divu", udiv>;
+def REMS_l3r : FL3R<0b110001100, "rems", srem>;
+def REMU_l3r : FL3R<0b110011100, "remu", urem>;
}
-def XOR_l3r : FL3R<"xor", xor>;
-defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+def XOR_l3r : FL3R<0b000011100, "xor", xor>;
+defm ASHR : FL3R_L2RBITP<0b000101100, 0b100101100, "ashr", sra>;
let Constraints = "$src1 = $dst" in
-def CRC_l3r : _FL3R<(outs GRRegs:$dst),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "crc32 $dst, $src2, $src3",
- [(set GRRegs:$dst,
- (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
- GRRegs:$src3))]>;
+def CRC_l3r : _FL3RSrcDst<0b101011100, (outs GRRegs:$dst),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "crc32 $dst, $src2, $src3",
+ [(set GRRegs:$dst,
+ (int_xcore_crc32 GRRegs:$src1, GRRegs:$src2,
+ GRRegs:$src3))]>;
-// TODO inpw, outpw
let mayStore=1 in {
-def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
- "st16 $val, $addr[$offset]",
- []>;
+def ST16_l3r : _FL3R<0b100001100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]", []>;
-def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
- "st8 $val, $addr[$offset]",
- []>;
+def ST8_l3r : _FL3R<0b100011100, (outs),
+ (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]", []>;
}
-// Four operand long
-let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
-def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
- GRRegs:$src4),
- "maccu $dst1, $dst2, $src3, $src4",
- []>;
+def INPW_l2rus : _FL2RUSBitp<0b100101110, (outs GRRegs:$a),
+ (ins GRRegs:$b, i32imm:$c), "inpw $a, res[$b], $c",
+ []>;
-def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
- GRRegs:$src4),
- "maccs $dst1, $dst2, $src3, $src4",
- []>;
+def OUTPW_l2rus : _FL2RUSBitp<0b100101101, (outs),
+ (ins GRRegs:$a, GRRegs:$b, i32imm:$c),
+ "outpw res[$b], $a, $c", []>;
+
+// Four operand long
+let Constraints = "$e = $a,$f = $b" in {
+def MACCU_l4r : _FL4RSrcDstSrcDst<
+ 0b000001, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccu $a, $b, $c, $d", []>;
+
+def MACCS_l4r : _FL4RSrcDstSrcDst<
+ 0b000010, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$f, GRRegs:$c, GRRegs:$d), "maccs $a, $b, $c, $d", []>;
}
-let Constraints = "$src1 = $dst1" in
-def CRC8_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "crc8 $dst1, $dst2, $src2, $src3",
- []>;
+let Constraints = "$e = $b" in
+def CRC8_l4r : _FL4RSrcDst<0b000000, (outs GRRegs:$a, GRRegs:$b),
+ (ins GRRegs:$e, GRRegs:$c, GRRegs:$d),
+ "crc8 $b, $a, $c, $d", []>;
// Five operand long
-def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "ladd $dst1, $dst2, $src1, $src2, $src3",
- []>;
+def LADD_l5r : _FL5R<0b000001, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst2, $dst1, $src1, $src2, $src3",
+ []>;
-def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "lsub $dst1, $dst2, $src1, $src2, $src3",
- []>;
+def LSUB_l5r : _FL5R<0b000010, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst2, $dst1, $src1, $src2, $src3", []>;
-def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
- "ldiv $dst1, $dst2, $src1, $src2, $src3",
- []>;
+def LDIVU_l5r : _FL5R<0b000000, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldivu $dst1, $dst2, $src3, $src1, $src2", []>;
// Six operand long
-def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
- (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
- GRRegs:$src4),
- "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
- []>;
+def LMUL_l6r : _FL6R<
+ 0b00000, (outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>;
// Register - U6
//let Uses = [DP] in ...
let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
- "ldaw $dst, dp[$a]",
- []>;
+def LDAWDP_ru6: _FRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b),
+ "ldaw $a, dp[$b]", []>;
let isReMaterializable = 1 in
-def LDAWDP_lru6: _FLRU6<
- (outs GRRegs:$dst), (ins MEMii:$a),
- "ldaw $dst, dp[$a]",
- [(set GRRegs:$dst, ADDRdpii:$a)]>;
+def LDAWDP_lru6: _FLRU6<0b011000, (outs GRRegs:$a), (ins MEMii:$b),
+ "ldaw $a, dp[$b]",
+ [(set GRRegs:$a, ADDRdpii:$b)]>;
let mayLoad=1 in
-def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
- "ldw $dst, dp[$a]",
- []>;
-
-def LDWDP_lru6: _FLRU6<
- (outs GRRegs:$dst), (ins MEMii:$a),
- "ldw $dst, dp[$a]",
- [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+def LDWDP_ru6: _FRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b),
+ "ldw $a, dp[$b]", []>;
+
+def LDWDP_lru6: _FLRU6<0b010110, (outs GRRegs:$a), (ins MEMii:$b),
+ "ldw $a, dp[$b]",
+ [(set GRRegs:$a, (load ADDRdpii:$b))]>;
let mayStore=1 in
-def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
- "stw $val, dp[$addr]",
- []>;
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b),
+ "stw $a, dp[$b]", []>;
-def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
- "stw $val, dp[$addr]",
- [(store GRRegs:$val, ADDRdpii:$addr)]>;
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins GRRegs:$a, MEMii:$b),
+ "stw $a, dp[$b]",
+ [(store GRRegs:$a, ADDRdpii:$b)]>;
//let Uses = [CP] in ..
let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
-defm LDWCP : FRU6_LRU6_cp<"ldw">;
+defm LDWCP : FRU6_LRU6_cp<0b011011, "ldw">;
let Uses = [SP] in {
let mayStore=1 in {
-def STWSP_ru6 : _FRU6<
- (outs), (ins GRRegs:$val, i32imm:$index),
- "stw $val, sp[$index]",
- [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
-
-def STWSP_lru6 : _FLRU6<
- (outs), (ins GRRegs:$val, i32imm:$index),
- "stw $val, sp[$index]",
- [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+def STWSP_ru6 : _FRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b),
+ "stw $a, sp[$b]",
+ [(XCoreStwsp GRRegs:$a, immU6:$b)]>;
+
+def STWSP_lru6 : _FLRU6<0b010101, (outs), (ins GRRegs:$a, i32imm:$b),
+ "stw $a, sp[$b]",
+ [(XCoreStwsp GRRegs:$a, immU16:$b)]>;
}
let mayLoad=1 in {
-def LDWSP_ru6 : _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldw $dst, sp[$b]",
- []>;
+def LDWSP_ru6 : _FRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b),
+ "ldw $a, sp[$b]", []>;
-def LDWSP_lru6 : _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldw $dst, sp[$b]",
- []>;
+def LDWSP_lru6 : _FLRU6<0b010111, (outs GRRegs:$a), (ins i32imm:$b),
+ "ldw $a, sp[$b]", []>;
}
let neverHasSideEffects = 1 in {
-def LDAWSP_ru6 : _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
+def LDAWSP_ru6 : _FRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
-def LDAWSP_lru6 : _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
+def LDAWSP_lru6 : _FLRU6<0b011001, (outs GRRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
-def LDAWSP_ru6_RRegs : _FRU6<
- (outs RRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
+let isCodeGenOnly = 1 in
+def LDAWSP_ru6_RRegs : _FRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
-def LDAWSP_lru6_RRegs : _FLRU6<
- (outs RRegs:$dst), (ins i32imm:$b),
- "ldaw $dst, sp[$b]",
- []>;
+let isCodeGenOnly = 1 in
+def LDAWSP_lru6_RRegs : _FLRU6<0b011001, (outs RRegs:$a), (ins i32imm:$b),
+ "ldaw $a, sp[$b]", []>;
}
}
let isReMaterializable = 1 in {
-def LDC_ru6 : _FRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldc $dst, $b",
- [(set GRRegs:$dst, immU6:$b)]>;
-
-def LDC_lru6 : _FLRU6<
- (outs GRRegs:$dst), (ins i32imm:$b),
- "ldc $dst, $b",
- [(set GRRegs:$dst, immU16:$b)]>;
+def LDC_ru6 : _FRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b),
+ "ldc $a, $b", [(set GRRegs:$a, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<0b011010, (outs GRRegs:$a), (ins i32imm:$b),
+ "ldc $a, $b", [(set GRRegs:$a, immU16:$b)]>;
}
-def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
- "setc res[$r], $val",
- [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+def SETC_ru6 : _FRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+ "setc res[$a], $b",
+ [(int_xcore_setc GRRegs:$a, immU6:$b)]>;
-def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
- "setc res[$r], $val",
- [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+def SETC_lru6 : _FLRU6<0b111010, (outs), (ins GRRegs:$a, i32imm:$b),
+ "setc res[$a], $b",
+ [(int_xcore_setc GRRegs:$a, immU16:$b)]>;
// Operand register - U6
let isBranch = 1, isTerminator = 1 in {
-defm BRFT: FRU6_LRU6_branch<"bt">;
-defm BRBT: FRU6_LRU6_branch<"bt">;
-defm BRFF: FRU6_LRU6_branch<"bf">;
-defm BRBF: FRU6_LRU6_branch<"bf">;
+defm BRFT: FRU6_LRU6_branch<0b011100, "bt">;
+defm BRBT: FRU6_LRU6_backwards_branch<0b011101, "bt">;
+defm BRFF: FRU6_LRU6_branch<0b011110, "bf">;
+defm BRBF: FRU6_LRU6_backwards_branch<0b011111, "bf">;
}
// U6
let Defs = [SP], Uses = [SP] in {
let neverHasSideEffects = 1 in
-defm EXTSP : FU6_LU6_np<"extsp">;
+defm EXTSP : FU6_LU6_np<0b0111011110, "extsp">;
+
let mayStore = 1 in
-defm ENTSP : FU6_LU6_np<"entsp">;
+defm ENTSP : FU6_LU6_np<0b0111011101, "entsp">;
let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
-defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+defm RETSP : FU6_LU6<0b0111011111, "retsp", XCoreRetsp>;
}
}
-// TODO extdp, kentsp, krestsp, blat
-// getsr, kalli
+let neverHasSideEffects = 1 in
+defm EXTDP : FU6_LU6_np<0b0111001110, "extdp">;
+
+let Uses = [R11], isCall=1 in
+defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
+
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def BRBU_u6 : _FU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
-def BRBU_lu6 : _FLU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
-def BRFU_u6 : _FU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
-def BRFU_lu6 : _FLU6<
- (outs),
- (ins brtarget:$target),
- "bu $target",
- []>;
+def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
}
//let Uses = [CP] in ...
let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
- "ldaw r11, cp[$a]",
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
[]>;
let Defs = [R11], isReMaterializable = 1 in
-def LDAWCP_lu6: _FLRU6<
- (outs), (ins MEMii:$a),
- "ldaw r11, cp[$a]",
- [(set R11, ADDRcpii:$a)]>;
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
-defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+let Defs = [R11] in
+defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
-defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+defm SETSR : FU6_LU6_int<0b0111101101, "setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<0b0111101100, "clrsr", int_xcore_clrsr>;
// setsr may cause a branch if it is used to enable events. clrsr may
// branch if it is executed while events are enabled.
-let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
-defm SETSR_branch : FU6_LU6_np<"setsr">;
-defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ isCodeGenOnly = 1 in {
+defm SETSR_branch : FU6_LU6_np<0b0111101101, "setsr">;
+defm CLRSR_branch : FU6_LU6_np<0b0111101100, "clrsr">;
}
+defm KCALL : FU6_LU6_np<0b0111001111, "kcall">;
+
+let Uses = [SP], Defs = [SP], mayStore = 1 in
+defm KENTSP : FU6_LU6_np<0b0111101110, "kentsp">;
+
+let Uses = [SP], Defs = [SP], mayLoad = 1 in
+defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
+
// U10
-// TODO ldwcpl, blacp
let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
-def LDAP_u10 : _FU10<
- (outs),
- (ins i32imm:$addr),
- "ldap r11, $addr",
- []>;
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
let Defs = [R11], isReMaterializable = 1 in
-def LDAP_lu10 : _FLU10<
- (outs),
- (ins i32imm:$addr),
- "ldap r11, $addr",
- [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
-let Defs = [R11], isReMaterializable = 1 in
-def LDAP_lu10_ba : _FLU10<(outs),
- (ins i32imm:$addr),
- "ldap r11, $addr",
- [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tblockaddress:$a))]>;
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in {
-def BL_u10 : _FU10<
- (outs), (ins calltarget:$target),
- "bl $target",
- [(XCoreBranchLink immU10:$target)]>;
-
-def BL_lu10 : _FLU10<
- (outs), (ins calltarget:$target),
- "bl $target",
- [(XCoreBranchLink immU20:$target)]>;
+def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
+
+def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+ [(XCoreBranchLink immU10:$a)]>;
+
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+ [(XCoreBranchLink immU20:$a)]>;
+}
+
+let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
+ neverHasSideEffects = 1 in {
+def LDWCP_u10 : _FU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]", []>;
+
+def LDWCP_lu10 : _FLU10<0b111001, (outs), (ins i32imm:$a), "ldw r11, cp[$a]",
+ []>;
}
// Two operand short
-// TODO eet, eef, tsetmr
def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b),
"not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>;
@@ -867,9 +815,9 @@ def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val),
"setd res[$r], $val",
[(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
-def SETPSC_l2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
- "setpsc res[$src1], $src2",
- [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+def SETPSC_2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r),
"getst $dst, res[$r]",
@@ -899,8 +847,16 @@ def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src),
"endin $dst, res[$src]",
[(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>;
+def EEF_2r : _F2R<0b001011, (outs), (ins GRRegs:$a, GRRegs:$b),
+ "eef $a, res[$b]", []>;
+
+def EET_2r : _F2R<0b001001, (outs), (ins GRRegs:$a, GRRegs:$b),
+ "eet $a, res[$b]", []>;
+
+def TSETMR_2r : _F2RImm<0b000111, (outs), (ins i32imm:$a, GRRegs:$b),
+ "tsetmr r$a, $b", []>;
+
// Two operand long
-// getd, testlcl
def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src),
"bitrev $dst, $src",
[(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
@@ -913,6 +869,12 @@ def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src),
"clz $dst, $src",
[(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+def GETD_l2r : _FL2R<0b0001111001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "getd $dst, res[$src]", []>;
+
+def GETN_l2r : _FL2R<0b0011011001, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "getn $dst, res[$src]", []>;
+
def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val),
"setc res[$r], $val",
[(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
@@ -937,14 +899,17 @@ def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2),
"setclk res[$src1], $src2",
[(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+def SETN_l2r : _FLR2R<0b0011011000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setn res[$src1], $src2", []>;
+
def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2),
"setrdy res[$src1], $src2",
[(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+def TESTLCL_l2r : _FL2R<0b0010011000, (outs GRRegs:$dst), (ins GRRegs:$src),
+ "testlcl $dst, res[$src]", []>;
+
// One operand short
-// TODO edu, eeu, waitet, waitef, tstart, clrtp
-// setdp, setcp, setev, kcall
-// dgetreg
def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a),
"msync res[$a]",
[(int_xcore_msync GRRegs:$a)]>;
@@ -968,9 +933,13 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
[(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
let Defs=[SP], neverHasSideEffects=1 in
-def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a),
- "set sp, $a",
- []>;
+def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), "set sp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETDP_1r : _F1R<0b001100, (outs), (ins GRRegs:$a), "set dp, $a", []>;
+
+let neverHasSideEffects=1 in
+def SETCP_1r : _F1R<0b001101, (outs), (ins GRRegs:$a), "set cp, $a", []>;
let hasCtrlDep = 1 in
def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a),
@@ -1008,17 +977,40 @@ def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a),
[(int_xcore_setev GRRegs:$a, R11)]>;
}
+def DGETREG_1r : _F1R<0b001110, (outs GRRegs:$a), (ins), "dgetreg $a", []>;
+
+def EDU_1r : _F1R<0b000000, (outs), (ins GRRegs:$a), "edu res[$a]", []>;
+
def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a),
"eeu res[$a]",
[(int_xcore_eeu GRRegs:$a)]>;
+def KCALL_1r : _F1R<0b010000, (outs), (ins GRRegs:$a), "kcall $a", []>;
+
+def WAITEF_1R : _F1R<0b000011, (outs), (ins GRRegs:$a), "waitef $a", []>;
+
+def WAITET_1R : _F1R<0b000010, (outs), (ins GRRegs:$a), "waitet $a", []>;
+
+def TSTART_1R : _F1R<0b000110, (outs), (ins GRRegs:$a), "start t[$a]", []>;
+
+def CLRPT_1R : _F1R<0b100000, (outs), (ins GRRegs:$a), "clrpt res[$a]", []>;
+
// Zero operand short
-// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
-// stet, getkep, getksp, setkep, getid, kret, dcall, dret,
-// dentsp, drestsp
def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>;
+def DCALL_0R : _F0R<0b0000011100, (outs), (ins), "dcall", []>;
+
+let Defs = [SP], Uses = [SP] in
+def DENTSP_0R : _F0R<0b0001001100, (outs), (ins), "dentsp", []>;
+
+let Defs = [SP] in
+def DRESTSP_0R : _F0R<0b0001001101, (outs), (ins), "drestsp", []>;
+
+def DRET_0R : _F0R<0b0000011110, (outs), (ins), "dret", []>;
+
+def FREET_0R : _F0R<0b0000001111, (outs), (ins), "freet", []>;
+
let Defs = [R11] in {
def GETID_0R : _F0R<0b0001001110, (outs), (ins),
"get r11, id",
@@ -1031,12 +1023,44 @@ def GETED_0R : _F0R<0b0000111110, (outs), (ins),
def GETET_0R : _F0R<0b0000111111, (outs), (ins),
"get r11, et",
[(set R11, (int_xcore_getet))]>;
+
+def GETKEP_0R : _F0R<0b0001001111, (outs), (ins),
+ "get r11, kep", []>;
+
+def GETKSP_0R : _F0R<0b0001011100, (outs), (ins),
+ "get r11, ksp", []>;
}
+let Defs = [SP] in
+def KRET_0R : _F0R<0b0000011101, (outs), (ins), "kret", []>;
+
+let Uses = [SP], mayLoad = 1 in {
+def LDET_0R : _F0R<0b0001011110, (outs), (ins), "ldw et, sp[4]", []>;
+
+def LDSED_0R : _F0R<0b0001011101, (outs), (ins), "ldw sed, sp[3]", []>;
+
+def LDSPC_0R : _F0R<0b0000101100, (outs), (ins), "ldw spc, sp[1]", []>;
+
+def LDSSR_0R : _F0R<0b0000101110, (outs), (ins), "ldw ssr, sp[2]", []>;
+}
+
+let Uses=[R11] in
+def SETKEP_0R : _F0R<0b0000011111, (outs), (ins), "set kep, r11", []>;
+
def SSYNC_0r : _F0R<0b0000001110, (outs), (ins),
"ssync",
[(int_xcore_ssync)]>;
+let Uses = [SP], mayStore = 1 in {
+def STET_0R : _F0R<0b0000111101, (outs), (ins), "stw et, sp[4]", []>;
+
+def STSED_0R : _F0R<0b0000111100, (outs), (ins), "stw sed, sp[3]", []>;
+
+def STSPC_0R : _F0R<0b0000101101, (outs), (ins), "stw spc, sp[1]", []>;
+
+def STSSR_0R : _F0R<0b0000101111, (outs), (ins), "stw ssr, sp[2]", []>;
+}
+
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
hasSideEffects = 1 in
def WAITEU_0R : _F0R<0b0000001100, (outs), (ins),
@@ -1047,8 +1071,8 @@ def WAITEU_0R : _F0R<0b0000001100, (outs), (ins),
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
-def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
-def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BLRF_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BLRF_lu10 texternalsym:$addr)>;
/// sext_inreg
def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
@@ -1090,7 +1114,7 @@ def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
(ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
- (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+ (STW_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
(STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
def : Pat<(store GRRegs:$val, GRRegs:$addr),
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index e637d9a..49b5634 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -101,72 +101,14 @@ XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
return false;
}
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void XCoreRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- if (!TFI->hasReservedCallFrame(MF)) {
- // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
- // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
-
- assert(Amount%4 == 0);
- Amount /= 4;
-
- bool isU6 = isImmU6(Amount);
- if (!isU6 && !isImmU16(Amount)) {
- // FIX could emit multiple instructions in this case.
-#ifndef NDEBUG
- errs() << "eliminateCallFramePseudoInstr size too big: "
- << Amount << "\n";
-#endif
- llvm_unreachable(0);
- }
-
- MachineInstr *New;
- if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
- int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
- New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
- .addImm(Amount);
- } else {
- assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
- int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
- New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
- .addImm(Amount);
- }
-
- // Replace the pseudo instruction with a new instruction...
- MBB.insert(I, New);
- }
- }
-
- MBB.erase(I);
-}
-
void
XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS) const {
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
- unsigned i = 0;
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- MachineOperand &FrameOp = MI.getOperand(i);
+ MachineOperand &FrameOp = MI.getOperand(FIOperandNum);
int FrameIndex = FrameOp.getIndex();
MachineFunction &MF = *MI.getParent()->getParent();
@@ -190,14 +132,14 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of DBG_VALUE instructions.
if (MI.isDebugValue()) {
- MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
}
// fold constant into offset.
- Offset += MI.getOperand(i + 1).getImm();
- MI.getOperand(i + 1).ChangeToImmediate(0);
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
assert(Offset%4 == 0 && "Misaligned stack offset");
@@ -231,7 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
.addReg(ScratchReg, RegState::Kill);
break;
case XCore::STWFI:
- BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+ BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r))
.addReg(Reg, getKillRegState(isKill))
.addReg(FrameReg)
.addReg(ScratchReg, RegState::Kill);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index c4dcb6b..1db3248 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -54,12 +54,9 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, RegScavenger *RS = NULL) const;
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;